From bd8260764596f8b9f221ef1be61d18c3b3402066 Mon Sep 17 00:00:00 2001 From: Bohan Jiang <52446949+Bohan-J@users.noreply.github.com> Date: Wed, 29 Apr 2026 17:17:09 +0800 Subject: [PATCH] fix(execenv): default-disable Codex native multi-agent in per-task config (#1845) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(execenv): default-disable Codex native multi-agent in per-task config Recent Codex app-server releases enable features.multi_agent by default, exposing spawn_agent / wait / close_agent tools that let a parent thread spawn nested subagents. The daemon currently models only the parent thread, so the parent's turn/completed is treated as task completion even when spawned children are still running — leading to premature task completion and dropped child output. Disable features.multi_agent by default in the per-task CODEX_HOME/config.toml so Multica's task lifecycle is the only orchestration layer in play. Strip both the dotted-key form (features.multi_agent) at TOML root and the multi_agent key inside a [features] table; siblings and unrelated tables are preserved. Honor MULTICA_CODEX_MULTI_AGENT=1 as an opt-out for users who explicitly want Codex native subagents inside a Multica task. The user's global ~/.codex/config.toml is never modified — only the daemon's isolated per-task copy. Also widen managedBlockRe to consume `\n*` rather than `\n?` so reruns don't accumulate blank lines when both the sandbox and multi-agent managed blocks coexist. * fix(execenv): inject managed multi_agent inside existing [features] table Per PR review (codex_multi_agent.go:77-83 vs :112-115): when the user's config.toml already has a top-level `[features]` table, writing `features.multi_agent = false` at the TOML root implicitly redefines the same `features` table. The strict TOML parser used by Codex (`toml-rs`) rejects that with `table 'features' already exists`, so Codex would fail to load the per-task config and refuse to start the thread. Verified the strict-parser failure with pelletier/go-toml/v2; the previous BurntSushi/toml-based regression test was permissive enough to miss it. Detect a root-level `[features]` header and place the managed block inside that table (`multi_agent = false` with marker comments). When no such header exists, keep the existing root-level dotted-key form. The managed-block regex matches both layouts so reruns and layout transitions stay idempotent. A `[features.experimental]` sub-table without a bare `[features]` header still uses the root dotted-key form, which is spec-valid (no explicit redefinition). Tests now use pelletier/go-toml/v2 to actually parse the output and assert features.multi_agent decodes to false; the regression case from the PR review is covered explicitly. * fix(execenv): recognize feature table header variants --------- Co-authored-by: Devv --- server/go.mod | 1 + server/go.sum | 2 + server/internal/daemon/execenv/codex_home.go | 8 + .../daemon/execenv/codex_multi_agent.go | 238 ++++++++ .../daemon/execenv/codex_multi_agent_test.go | 569 ++++++++++++++++++ .../internal/daemon/execenv/codex_sandbox.go | 6 +- 6 files changed, 822 insertions(+), 2 deletions(-) create mode 100644 server/internal/daemon/execenv/codex_multi_agent.go create mode 100644 server/internal/daemon/execenv/codex_multi_agent_test.go diff --git a/server/go.mod b/server/go.mod index 9b4d40d16..89108c14e 100644 --- a/server/go.mod +++ b/server/go.mod @@ -17,6 +17,7 @@ require ( github.com/lmittmann/tint v1.1.3 github.com/mattn/go-shellwords v1.0.13 github.com/oklog/ulid/v2 v2.1.1 + github.com/pelletier/go-toml/v2 v2.3.0 github.com/prometheus/client_golang v1.23.2 github.com/redis/go-redis/v9 v9.18.0 github.com/resend/resend-go/v2 v2.28.0 diff --git a/server/go.sum b/server/go.sum index 7cda863d3..187864046 100644 --- a/server/go.sum +++ b/server/go.sum @@ -94,6 +94,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8m github.com/oklog/ulid/v2 v2.1.1 h1:suPZ4ARWLOJLegGFiZZ1dFAkqzhMjL3J1TzI+5wHz8s= github.com/oklog/ulid/v2 v2.1.1/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ= github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= +github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM= +github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= diff --git a/server/internal/daemon/execenv/codex_home.go b/server/internal/daemon/execenv/codex_home.go index 46b695a4f..6508d58e1 100644 --- a/server/internal/daemon/execenv/codex_home.go +++ b/server/internal/daemon/execenv/codex_home.go @@ -111,6 +111,14 @@ func prepareCodexHomeWithOpts(codexHome string, opts CodexHomeOptions, logger *s logger.Warn("execenv: codex-home ensure sandbox config failed", "error", err) } + // Disable Codex native multi-agent inside daemon-managed task sessions + // so the parent thread's `turn/completed` is not interpreted as task + // completion while spawned subagents are still running. See + // codex_multi_agent.go for the full rationale and escape hatch. + if err := ensureCodexMultiAgentConfig(filepath.Join(codexHome, "config.toml"), logger); err != nil { + logger.Warn("execenv: codex-home ensure multi-agent config failed", "error", err) + } + return nil } diff --git a/server/internal/daemon/execenv/codex_multi_agent.go b/server/internal/daemon/execenv/codex_multi_agent.go new file mode 100644 index 000000000..99a007276 --- /dev/null +++ b/server/internal/daemon/execenv/codex_multi_agent.go @@ -0,0 +1,238 @@ +package execenv + +import ( + "fmt" + "log/slog" + "os" + "regexp" + "strings" +) + +// Background +// +// Recent Codex `app-server` releases enable `features.multi_agent` by +// default, exposing spawn_agent / send_input / wait / resume_agent / +// close_agent tools to the model so a Codex thread can fan out into nested +// subagents. The Multica daemon currently models only the parent Codex +// thread per task: when the parent emits `turn/completed`, the task is +// marked terminal even if spawned children are still running or have not +// flushed their output. The result is a class of premature-completion +// failures where useful child-agent work is dropped. +// +// Until either Codex exposes a "parent done but children still open" +// lifecycle state with drain/cancel primitives, or the Multica runtime +// models child threads as first-class task entities, the daemon disables +// Codex native multi-agent by default for daemon-managed task sessions. +// The override only touches the per-task `CODEX_HOME/config.toml`; the +// user's global `~/.codex/config.toml` is never modified. +// +// Users who explicitly want Codex native subagents inside a Multica task +// (and accept the lifecycle risk) can keep the feature enabled by setting +// `MULTICA_CODEX_MULTI_AGENT=1` in the daemon environment. +// +// Layout note +// +// TOML rejects redefining a table that has already been created — including +// implicitly via a dotted key. So the managed block must adapt to the +// user's existing config: +// +// - If the user's config contains a top-level `[features]` table, the +// managed `multi_agent = false` is injected INSIDE that table (with +// marker comments). Writing `features.multi_agent = false` at the +// TOML root would implicitly redefine the same `features` table and +// the strict TOML parser used by Codex (`toml-rs`) would fail with +// `table 'features' already exists`. +// - Otherwise, the managed block lives at the top of the file with the +// dotted-key form `features.multi_agent = false`. + +// MulticaCodexMultiAgentEnv is the env var users can set to keep Codex +// native multi-agent enabled inside daemon-managed tasks. Anything truthy +// (1, true, yes, on; case-insensitive) keeps the feature on; everything +// else (including unset) disables it. +const MulticaCodexMultiAgentEnv = "MULTICA_CODEX_MULTI_AGENT" + +// multicaMultiAgentBeginMarker / multicaMultiAgentEndMarker delimit the +// multi-agent-specific managed block. Kept separate from the sandbox +// block so each can evolve and migrate independently. +const ( + multicaMultiAgentBeginMarker = "# BEGIN multica-managed multi-agent (do not edit; regenerated by daemon)" + multicaMultiAgentEndMarker = "# END multica-managed multi-agent" +) + +// `\n*` rather than `\n?` so reruns don't accumulate blank lines when this +// block coexists with the sandbox managed block in the same file. The same +// regex matches the block whether it sits at the file root (dotted-key +// form) or inside a `[features]` table — only the body keys differ. +var multiAgentBlockRe = regexp.MustCompile( + `(?ms)^` + regexp.QuoteMeta(multicaMultiAgentBeginMarker) + + `.*?^` + regexp.QuoteMeta(multicaMultiAgentEndMarker) + `\n*`) + +var ( + // matches a top-level `[features]` table header, allowing TOML's optional + // whitespace inside brackets and inline comments after the header. + rootFeaturesTableHeaderRe = regexp.MustCompile(`^\s*\[\s*features\s*\]\s*(?:#.*)?$`) + // matches `multi_agent = ...` (with optional whitespace) inside a + // `[features]` table. + featuresTableMultiAgentRe = regexp.MustCompile(`^\s*multi_agent\s*=`) + // matches `features.multi_agent = ...` at the TOML root (top-level + // dotted-key form, including TOML's optional whitespace around dots). + rootDottedMultiAgentRe = regexp.MustCompile(`^\s*features\s*\.\s*multi_agent\s*=`) +) + +// codexMultiAgentEnabled reports whether the user opted into keeping Codex +// native multi-agent on for daemon-managed tasks. +func codexMultiAgentEnabled() bool { + raw := strings.TrimSpace(os.Getenv(MulticaCodexMultiAgentEnv)) + switch strings.ToLower(raw) { + case "1", "true", "yes", "on": + return true + } + return false +} + +// renderMulticaMultiAgentBlock returns the daemon-managed multi-agent +// block. The body uses `multi_agent = false` when injected inside a +// `[features]` table, and `features.multi_agent = false` otherwise. +func renderMulticaMultiAgentBlock(inFeaturesTable bool) string { + var b strings.Builder + b.WriteString(multicaMultiAgentBeginMarker) + b.WriteString("\n") + if inFeaturesTable { + b.WriteString("multi_agent = false\n") + } else { + b.WriteString("features.multi_agent = false\n") + } + b.WriteString(multicaMultiAgentEndMarker) + b.WriteString("\n") + return b.String() +} + +// stripUserMultiAgentDirectives removes any `features.multi_agent = ...` +// line at the TOML root (dotted-key form), plus any `multi_agent = ...` +// line that sits inside a top-level `[features]` table. Both forms encode +// the same TOML key and would conflict with the managed block. +// +// Other tables (`[features.experimental]`, `[profiles.foo]`, ...) are +// preserved untouched: they live under their own scope and don't redefine +// `features.multi_agent` at the root. +func stripUserMultiAgentDirectives(content string) string { + lines := strings.Split(content, "\n") + out := make([]string, 0, len(lines)) + currentTable := "" // empty = TOML root + for _, line := range lines { + trimmed := strings.TrimSpace(line) + if rootFeaturesTableHeaderRe.MatchString(line) { + currentTable = "[features]" + out = append(out, line) + continue + } + if strings.HasPrefix(trimmed, "[") { + currentTable = trimmed + out = append(out, line) + continue + } + switch currentTable { + case "": + if rootDottedMultiAgentRe.MatchString(trimmed) { + continue + } + case "[features]": + if featuresTableMultiAgentRe.MatchString(trimmed) { + continue + } + } + out = append(out, line) + } + return strings.Join(out, "\n") +} + +// hasRootFeaturesTable reports whether the file contains a top-level +// `[features]` table header. Sub-tables like `[features.experimental]` do +// NOT count: they implicitly create `features` but don't conflict with a +// root-level `features.multi_agent` dotted key. +func hasRootFeaturesTable(content string) bool { + for _, line := range strings.Split(content, "\n") { + if rootFeaturesTableHeaderRe.MatchString(line) { + return true + } + } + return false +} + +// injectManagedBlockIntoFeaturesTable inserts the in-table managed block +// immediately after the first `[features]` header line. Caller must have +// already stripped any prior managed block and any user-set `multi_agent` +// directive from inside the table. +func injectManagedBlockIntoFeaturesTable(content string) string { + block := renderMulticaMultiAgentBlock(true) + // Drop the trailing `\n` so we don't introduce a stray blank line when + // splicing block lines between existing lines. + blockLines := strings.Split(strings.TrimRight(block, "\n"), "\n") + lines := strings.Split(content, "\n") + for i, line := range lines { + if !rootFeaturesTableHeaderRe.MatchString(line) { + continue + } + out := make([]string, 0, len(lines)+len(blockLines)) + out = append(out, lines[:i+1]...) + out = append(out, blockLines...) + out = append(out, lines[i+1:]...) + return strings.Join(out, "\n") + } + return content +} + +// ensureCodexMultiAgentConfig writes the daemon-managed multi-agent block +// into the per-task config.toml so Codex native subagents stay disabled. +// Idempotent: running it twice produces the same file. +// +// When MULTICA_CODEX_MULTI_AGENT is set to a truthy value, the function is +// a no-op — the user has explicitly opted into Codex native subagents and +// accepts the lifecycle risk. Toggling the env var across prepare runs is +// not supported: the per-task config is short-lived (recreated per task), +// so users should set the var once at daemon start. +func ensureCodexMultiAgentConfig(configPath string, logger *slog.Logger) error { + if codexMultiAgentEnabled() { + if logger != nil { + logger.Info("codex multi-agent: leaving Codex native multi-agent untouched per MULTICA_CODEX_MULTI_AGENT", + "config_path", configPath, + ) + } + return nil + } + + data, err := os.ReadFile(configPath) + if err != nil && !os.IsNotExist(err) { + return fmt.Errorf("read config.toml: %w", err) + } + existing := string(data) + + // Always strip any previously written managed block (root or in-table + // form) so reruns and layout transitions stay clean. + existing = multiAgentBlockRe.ReplaceAllString(existing, "") + // Strip user-set directives in both encodings; the managed block re-adds + // the canonical form below. + existing = stripUserMultiAgentDirectives(existing) + + var updated string + if hasRootFeaturesTable(existing) { + updated = injectManagedBlockIntoFeaturesTable(existing) + } else { + existing = strings.TrimLeft(existing, "\n") + block := renderMulticaMultiAgentBlock(false) + if existing == "" { + updated = block + } else { + updated = block + "\n" + existing + } + } + + if updated == string(data) { + return nil + } + + if err := os.WriteFile(configPath, []byte(updated), 0o644); err != nil { + return fmt.Errorf("write config.toml: %w", err) + } + return nil +} diff --git a/server/internal/daemon/execenv/codex_multi_agent_test.go b/server/internal/daemon/execenv/codex_multi_agent_test.go new file mode 100644 index 000000000..a3400f992 --- /dev/null +++ b/server/internal/daemon/execenv/codex_multi_agent_test.go @@ -0,0 +1,569 @@ +package execenv + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/pelletier/go-toml/v2" +) + +// parseTOML decodes the given content with a spec-strict parser. Codex's +// `toml-rs` follows the same strict semantics, so a config that parses +// here will load in Codex. +func parseTOML(t *testing.T, content string) map[string]any { + t.Helper() + var parsed map[string]any + if err := toml.Unmarshal([]byte(content), &parsed); err != nil { + t.Fatalf("expected valid TOML, got parser error: %v\n--- file ---\n%s", err, content) + } + return parsed +} + +// requireMultiAgentDisabled asserts that the parsed config has +// features.multi_agent set to false. +func requireMultiAgentDisabled(t *testing.T, parsed map[string]any) { + t.Helper() + features, ok := parsed["features"].(map[string]any) + if !ok { + t.Fatalf("expected `features` table in parsed config, got: %#v", parsed["features"]) + } + v, ok := features["multi_agent"].(bool) + if !ok { + t.Fatalf("expected features.multi_agent to be a bool, got: %#v", features["multi_agent"]) + } + if v { + t.Errorf("expected features.multi_agent = false, got true") + } +} + +func TestStripUserMultiAgentDirectives(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + in string + want string + }{ + { + name: "drops top-level dotted-key form", + in: `model = "o3" +features.multi_agent = true + +[profiles.default] +model = "o3" +`, + want: `model = "o3" + +[profiles.default] +model = "o3" +`, + }, + { + name: "drops top-level dotted-key form with whitespace", + in: `model = "o3" +features . multi_agent = true + +[profiles.default] +model = "o3" +`, + want: `model = "o3" + +[profiles.default] +model = "o3" +`, + }, + { + name: "drops multi_agent inside [features] table", + in: `[features] +multi_agent = true +experimental_foo = true + +[profiles.default] +model = "o3" +`, + want: `[features] +experimental_foo = true + +[profiles.default] +model = "o3" +`, + }, + { + name: "drops multi_agent inside [features] table with inline comment", + in: `[features] # user feature flags +multi_agent = true +experimental_foo = true +`, + want: `[features] # user feature flags +experimental_foo = true +`, + }, + { + name: "drops multi_agent inside spaced [ features ] table", + in: `[ features ] +multi_agent = true +experimental_foo = true +`, + want: `[ features ] +experimental_foo = true +`, + }, + { + name: "preserves multi_agent under unrelated table", + in: `[profiles.experimental] +multi_agent = true +`, + want: `[profiles.experimental] +multi_agent = true +`, + }, + { + name: "preserves multi_agent under nested [features.experimental]", + in: `[features.experimental] +multi_agent = true +`, + want: `[features.experimental] +multi_agent = true +`, + }, + { + name: "no multi_agent — content unchanged", + in: `model = "o3" + +[profiles.default] +model = "o3" +`, + want: `model = "o3" + +[profiles.default] +model = "o3" +`, + }, + { + name: "drops both forms simultaneously", + in: `features.multi_agent = true + +[features] +multi_agent = false +something_else = "keep" +`, + want: ` +[features] +something_else = "keep" +`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := stripUserMultiAgentDirectives(tt.in) + if got != tt.want { + t.Errorf("stripUserMultiAgentDirectives mismatch\n--- got ---\n%s\n--- want ---\n%s", got, tt.want) + } + }) + } +} + +func TestEnsureCodexMultiAgentConfigEmptyFile(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + configPath := filepath.Join(dir, "config.toml") + + if err := ensureCodexMultiAgentConfig(configPath, nil); err != nil { + t.Fatalf("ensureCodexMultiAgentConfig failed: %v", err) + } + + data, err := os.ReadFile(configPath) + if err != nil { + t.Fatalf("read result: %v", err) + } + got := string(data) + if !strings.Contains(got, "features.multi_agent = false") { + t.Errorf("expected managed block to set features.multi_agent = false at root, got:\n%s", got) + } + if !strings.Contains(got, multicaMultiAgentBeginMarker) { + t.Errorf("expected begin marker, got:\n%s", got) + } + if !strings.Contains(got, multicaMultiAgentEndMarker) { + t.Errorf("expected end marker, got:\n%s", got) + } + requireMultiAgentDisabled(t, parseTOML(t, got)) +} + +func TestEnsureCodexMultiAgentConfigDottedKey(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + configPath := filepath.Join(dir, "config.toml") + original := `model = "o3" +features.multi_agent = true + +[profiles.default] +model = "o3" +` + if err := os.WriteFile(configPath, []byte(original), 0o644); err != nil { + t.Fatalf("write fixture: %v", err) + } + + if err := ensureCodexMultiAgentConfig(configPath, nil); err != nil { + t.Fatalf("ensureCodexMultiAgentConfig failed: %v", err) + } + + data, _ := os.ReadFile(configPath) + got := string(data) + if strings.Contains(got, "features.multi_agent = true") { + t.Errorf("expected user features.multi_agent = true to be stripped, got:\n%s", got) + } + if !strings.Contains(got, "features.multi_agent = false") { + t.Errorf("expected managed features.multi_agent = false at root, got:\n%s", got) + } + if !strings.Contains(got, `[profiles.default]`) || !strings.Contains(got, `model = "o3"`) { + t.Errorf("expected unrelated content preserved, got:\n%s", got) + } + requireMultiAgentDisabled(t, parseTOML(t, got)) +} + +func TestEnsureCodexMultiAgentConfigDottedKeyWithWhitespace(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + configPath := filepath.Join(dir, "config.toml") + original := `model = "o3" +features . multi_agent = true +` + if err := os.WriteFile(configPath, []byte(original), 0o644); err != nil { + t.Fatalf("write fixture: %v", err) + } + + if err := ensureCodexMultiAgentConfig(configPath, nil); err != nil { + t.Fatalf("ensureCodexMultiAgentConfig failed: %v", err) + } + + data, _ := os.ReadFile(configPath) + got := string(data) + if strings.Contains(got, "features . multi_agent = true") { + t.Errorf("expected user features . multi_agent = true to be stripped, got:\n%s", got) + } + requireMultiAgentDisabled(t, parseTOML(t, got)) +} + +func TestEnsureCodexMultiAgentConfigFeaturesTable(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + configPath := filepath.Join(dir, "config.toml") + original := `[features] +multi_agent = true +experimental_thinking = true + +[profiles.default] +model = "o3" +` + if err := os.WriteFile(configPath, []byte(original), 0o644); err != nil { + t.Fatalf("write fixture: %v", err) + } + + if err := ensureCodexMultiAgentConfig(configPath, nil); err != nil { + t.Fatalf("ensureCodexMultiAgentConfig failed: %v", err) + } + + data, _ := os.ReadFile(configPath) + got := string(data) + + // User's `multi_agent = true` must be gone, our managed `multi_agent = false` + // must be inside the [features] table (NOT at the root as a dotted key, + // which would redefine the table and break the strict TOML parser). + if strings.Contains(got, "multi_agent = true") { + t.Errorf("expected user multi_agent = true to be stripped, got:\n%s", got) + } + if strings.Contains(got, "features.multi_agent = false") { + t.Errorf("managed block must NOT use root dotted-key form when [features] table exists (would redefine the table); got:\n%s", got) + } + if !strings.Contains(got, "[features]") { + t.Errorf("expected [features] header preserved, got:\n%s", got) + } + if !strings.Contains(got, "experimental_thinking = true") { + t.Errorf("expected sibling features.* keys preserved, got:\n%s", got) + } + + // Output must parse as valid TOML and have features.multi_agent = false. + requireMultiAgentDisabled(t, parseTOML(t, got)) +} + +func TestEnsureCodexMultiAgentConfigFeaturesTableHeaderVariants(t *testing.T) { + t.Parallel() + + cases := map[string]string{ + "inline_comment": `[features] # user feature flags +multi_agent = true +experimental_thinking = true +`, + "spaced_header": `[ features ] +multi_agent = true +experimental_thinking = true +`, + } + + for name, original := range cases { + t.Run(name, func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + configPath := filepath.Join(dir, "config.toml") + if err := os.WriteFile(configPath, []byte(original), 0o644); err != nil { + t.Fatalf("write fixture: %v", err) + } + + if err := ensureCodexMultiAgentConfig(configPath, nil); err != nil { + t.Fatalf("ensureCodexMultiAgentConfig failed: %v", err) + } + + data, _ := os.ReadFile(configPath) + got := string(data) + if strings.Contains(got, "features.multi_agent = false") { + t.Errorf("managed block must NOT use root dotted-key form when [features] table exists; got:\n%s", got) + } + if strings.Contains(got, "multi_agent = true") { + t.Errorf("expected user multi_agent = true to be stripped, got:\n%s", got) + } + + parsed := parseTOML(t, got) + requireMultiAgentDisabled(t, parsed) + features := parsed["features"].(map[string]any) + if v, _ := features["experimental_thinking"].(bool); !v { + t.Errorf("expected user's features.experimental_thinking preserved, got %v", features["experimental_thinking"]) + } + }) + } +} + +func TestEnsureCodexMultiAgentConfigFeaturesTableEmpty(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + configPath := filepath.Join(dir, "config.toml") + original := `[features] +` + if err := os.WriteFile(configPath, []byte(original), 0o644); err != nil { + t.Fatalf("write fixture: %v", err) + } + + if err := ensureCodexMultiAgentConfig(configPath, nil); err != nil { + t.Fatalf("ensureCodexMultiAgentConfig failed: %v", err) + } + + data, _ := os.ReadFile(configPath) + requireMultiAgentDisabled(t, parseTOML(t, string(data))) +} + +func TestEnsureCodexMultiAgentConfigFeaturesSubtableOnly(t *testing.T) { + t.Parallel() + + // User has [features.experimental] but no bare [features] header. The + // dotted-key form at root is fine — both implicitly define `features`, + // neither defines `[features]` explicitly, so no redefinition. + dir := t.TempDir() + configPath := filepath.Join(dir, "config.toml") + original := `[features.experimental] +thinking = true +` + if err := os.WriteFile(configPath, []byte(original), 0o644); err != nil { + t.Fatalf("write fixture: %v", err) + } + + if err := ensureCodexMultiAgentConfig(configPath, nil); err != nil { + t.Fatalf("ensureCodexMultiAgentConfig failed: %v", err) + } + + data, _ := os.ReadFile(configPath) + got := string(data) + if !strings.Contains(got, "features.multi_agent = false") { + t.Errorf("expected root dotted-key form when only sub-tables exist, got:\n%s", got) + } + + parsed := parseTOML(t, got) + requireMultiAgentDisabled(t, parsed) + features := parsed["features"].(map[string]any) + exp, _ := features["experimental"].(map[string]any) + if v, _ := exp["thinking"].(bool); !v { + t.Errorf("expected features.experimental.thinking preserved, got: %#v", exp) + } +} + +func TestEnsureCodexMultiAgentConfigIdempotent(t *testing.T) { + t.Parallel() + + cases := map[string]string{ + "root_form": `model = "o3" +features.multi_agent = true +`, + "in_features_table": `[features] +multi_agent = true +experimental_thinking = true +`, + "in_features_table_with_other_keys": `[features] +experimental_thinking = true + +[profiles.default] +model = "o3" +`, + } + for name, original := range cases { + t.Run(name, func(t *testing.T) { + t.Parallel() + dir := t.TempDir() + configPath := filepath.Join(dir, "config.toml") + if err := os.WriteFile(configPath, []byte(original), 0o644); err != nil { + t.Fatalf("write fixture: %v", err) + } + + if err := ensureCodexMultiAgentConfig(configPath, nil); err != nil { + t.Fatalf("first run failed: %v", err) + } + first, _ := os.ReadFile(configPath) + infoFirst, _ := os.Stat(configPath) + + if err := ensureCodexMultiAgentConfig(configPath, nil); err != nil { + t.Fatalf("second run failed: %v", err) + } + second, _ := os.ReadFile(configPath) + infoSecond, _ := os.Stat(configPath) + + if string(first) != string(second) { + t.Errorf("expected idempotent rewrite\n--- first ---\n%s\n--- second ---\n%s", first, second) + } + if !infoSecond.ModTime().Equal(infoFirst.ModTime()) { + t.Errorf("expected no rewrite on second pass (file was touched)") + } + // Final output must parse as valid TOML. + requireMultiAgentDisabled(t, parseTOML(t, string(second))) + }) + } +} + +func TestEnsureCodexMultiAgentConfigEscapeHatch(t *testing.T) { + // Cannot run in parallel: mutates process env. + dir := t.TempDir() + configPath := filepath.Join(dir, "config.toml") + original := `model = "o3" +features.multi_agent = true +` + if err := os.WriteFile(configPath, []byte(original), 0o644); err != nil { + t.Fatalf("write fixture: %v", err) + } + + t.Setenv(MulticaCodexMultiAgentEnv, "1") + + if err := ensureCodexMultiAgentConfig(configPath, nil); err != nil { + t.Fatalf("ensureCodexMultiAgentConfig failed: %v", err) + } + + data, _ := os.ReadFile(configPath) + got := string(data) + if got != original { + t.Errorf("expected file untouched when escape hatch set\n--- got ---\n%s\n--- want ---\n%s", got, original) + } +} + +func TestCodexMultiAgentEnabledTruthy(t *testing.T) { + for _, v := range []string{"1", "true", "TRUE", "yes", "On"} { + t.Run(v, func(t *testing.T) { + t.Setenv(MulticaCodexMultiAgentEnv, v) + if !codexMultiAgentEnabled() { + t.Errorf("expected %q to be truthy", v) + } + }) + } +} + +func TestCodexMultiAgentEnabledFalsy(t *testing.T) { + for _, v := range []string{"", "0", "false", "no", "off", "anything else"} { + t.Run(v, func(t *testing.T) { + t.Setenv(MulticaCodexMultiAgentEnv, v) + if codexMultiAgentEnabled() { + t.Errorf("expected %q to be falsy", v) + } + }) + } +} + +func TestEnsureCodexMultiAgentConfigCoexistsWithSandboxBlock(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + configPath := filepath.Join(dir, "config.toml") + original := `model = "o3" +features.multi_agent = true +` + if err := os.WriteFile(configPath, []byte(original), 0o644); err != nil { + t.Fatalf("write fixture: %v", err) + } + + policy := codexSandboxPolicy{Mode: "workspace-write", NetworkAccess: true, Reason: "test"} + if err := ensureCodexSandboxConfig(configPath, policy, "0.121.0", nil); err != nil { + t.Fatalf("ensureCodexSandboxConfig failed: %v", err) + } + if err := ensureCodexMultiAgentConfig(configPath, nil); err != nil { + t.Fatalf("ensureCodexMultiAgentConfig failed: %v", err) + } + + data, _ := os.ReadFile(configPath) + got := string(data) + if !strings.Contains(got, multicaManagedBeginMarker) { + t.Errorf("expected sandbox managed block, got:\n%s", got) + } + if !strings.Contains(got, multicaMultiAgentBeginMarker) { + t.Errorf("expected multi-agent managed block, got:\n%s", got) + } + if strings.Contains(got, "features.multi_agent = true") { + t.Errorf("expected user features.multi_agent = true to be stripped, got:\n%s", got) + } + + // File must parse as valid TOML and have multi_agent disabled. + requireMultiAgentDisabled(t, parseTOML(t, got)) + + // Re-running both should be idempotent. + if err := ensureCodexSandboxConfig(configPath, policy, "0.121.0", nil); err != nil { + t.Fatalf("ensureCodexSandboxConfig (rerun) failed: %v", err) + } + if err := ensureCodexMultiAgentConfig(configPath, nil); err != nil { + t.Fatalf("ensureCodexMultiAgentConfig (rerun) failed: %v", err) + } + dataAfter, _ := os.ReadFile(configPath) + if string(dataAfter) != got { + t.Errorf("expected idempotent combined rewrite\n--- first ---\n%s\n--- second ---\n%s", got, dataAfter) + } +} + +// Regression for PR #1845 review: when the user's config has a `[features]` +// table, naively writing `features.multi_agent = false` at the TOML root +// implicitly redefines the same table. The strict TOML parser used by +// Codex (`toml-rs`) rejects that with `table 'features' already exists`. +func TestRegressionFeaturesTableProducesValidTOML(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + configPath := filepath.Join(dir, "config.toml") + original := `[features] +experimental_thinking = true +` + if err := os.WriteFile(configPath, []byte(original), 0o644); err != nil { + t.Fatalf("write fixture: %v", err) + } + + if err := ensureCodexMultiAgentConfig(configPath, nil); err != nil { + t.Fatalf("ensureCodexMultiAgentConfig failed: %v", err) + } + + data, _ := os.ReadFile(configPath) + parsed := parseTOML(t, string(data)) + requireMultiAgentDisabled(t, parsed) + + features := parsed["features"].(map[string]any) + if v, _ := features["experimental_thinking"].(bool); !v { + t.Errorf("expected user's features.experimental_thinking preserved, got %v", features["experimental_thinking"]) + } +} diff --git a/server/internal/daemon/execenv/codex_sandbox.go b/server/internal/daemon/execenv/codex_sandbox.go index b9403d28e..bfea36f82 100644 --- a/server/internal/daemon/execenv/codex_sandbox.go +++ b/server/internal/daemon/execenv/codex_sandbox.go @@ -141,10 +141,12 @@ func renderMulticaManagedBlock(policy codexSandboxPolicy) string { } // managedBlockRe captures the daemon-owned block (including the surrounding -// markers) so it can be replaced idempotently. +// markers and any trailing blank lines) so it can be replaced idempotently. +// `\n*` rather than `\n?` so reruns don't accumulate blank lines when the +// block coexists with another managed block (e.g. multi-agent) in the file. var managedBlockRe = regexp.MustCompile( `(?ms)^` + regexp.QuoteMeta(multicaManagedBeginMarker) + - `.*?^` + regexp.QuoteMeta(multicaManagedEndMarker) + `\n?`) + `.*?^` + regexp.QuoteMeta(multicaManagedEndMarker) + `\n*`) // upsertMulticaManagedBlock returns the config content with the multica-managed // block placed at the very top of the file. Any previously written managed