Compare commits

...

2 Commits

Author SHA1 Message Date
Jiang Bohan
b6d6e1b029 fix(daemon/execenv): drop per-task Codex copy when shared source removed (MUL-2646)
Extend the MUL-2646 fix to the deletion arm of "sync the shared source":
`syncCopiedFile` (renamed from `copyFileIfExists`) now also removes the
per-task `dst` when the shared `src` is absent. The prior version
short-circuited on missing src and left `config.toml` / `config.json` /
`instructions.md` from the previous Prepare lingering in the per-task
home — so a user who removed a provider by deleting `~/.codex/config.toml`,
or pulled `config.json` / `instructions.md` out of the shared home, would
keep replaying the stale copy on session resume.

For `config.toml` the subsequent `ensureCodex{Sandbox,MultiAgent,Memory}Config`
passes recreate the file with only the daemon-managed default blocks, so
removing the shared file cleanly drops every user-managed
`[model_providers.X]` / `model_provider` line. For `config.json` and
`instructions.md` there is no daemon default, so they disappear in
lockstep with the shared source.

Adds `TestPrepareCodexHome_DropsCopiedConfigWhenSharedSourceRemoved`
covering the new path, and extends the refresh-arm test to assert the
multi-agent / memory marker blocks are still present after the copy is
refreshed.

Co-authored-by: multica-agent <github@multica.ai>
2026-05-26 14:46:40 +08:00
Jiang Bohan
3b0de68cd7 fix(daemon/execenv): refresh stale Codex config copies across env reuse (MUL-2646)
`copyFileIfExists` previously short-circuited whenever the per-task
`codex-home/{config.toml,config.json,instructions.md}` already existed,
so once the files were seeded at first Prepare they were never refreshed
again — even though `Reuse()` calls `prepareCodexHomeWithOpts` on every
resume. A user who rotated their Codex `~/.codex/config.toml` between
runs (e.g. switching the active `[model_providers.X]` `base_url`, or
pointing `env_key` at a freshly rotated API key) kept reading the stale
per-task copy on session resume. Codex then issued requests to the new
URL using the old key and the API rejected the token.

Treat any existing `dst` as something to drop and re-copy from the
current shared source, mirroring the symlink path that already refreshes
`auth.json` (#2126). The daemon-managed sandbox / multi-agent / memory
blocks are applied via marker-bracketed idempotent passes after the
copy, so a re-copy + re-ensure cycle preserves them.

Co-authored-by: multica-agent <github@multica.ai>
2026-05-26 14:38:11 +08:00
2 changed files with 234 additions and 11 deletions

View File

@@ -86,12 +86,12 @@ func prepareCodexHomeWithOpts(codexHome string, opts CodexHomeOptions, logger *s
// into a stale local copy.
logCodexAuthState(filepath.Join(codexHome, "auth.json"), logger)
// Copy config files (isolated per task).
// Sync config files from the shared source (isolated per task).
for _, name := range codexCopiedFiles {
src := filepath.Join(sharedHome, name)
dst := filepath.Join(codexHome, name)
if err := copyFileIfExists(src, dst); err != nil {
logger.Warn("execenv: codex-home copy failed", "file", name, "error", err)
if err := syncCopiedFile(src, dst); err != nil {
logger.Warn("execenv: codex-home sync failed", "file", name, "error", err)
}
}
@@ -271,18 +271,47 @@ func logCodexAuthState(authPath string, logger *slog.Logger) {
// codex_sandbox.go's ensureCodexSandboxConfig so they can be updated
// idempotently without touching user-managed keys.)
// copyFileIfExists copies src to dst. If src doesn't exist, it's a no-op.
// If dst already exists, it's not overwritten.
func copyFileIfExists(src, dst string) error {
if _, err := os.Stat(src); os.IsNotExist(err) {
return nil
// syncCopiedFile mirrors a per-task dst onto the current state of the shared
// src so the per-task copy tracks the shared source across Reuse() runs:
//
// - src present, dst absent: copy src → dst
// - src present, dst present: drop dst and re-copy src → dst (refresh)
// - src absent, dst present: drop dst (the shared source has been removed,
// so the per-task stale copy must not linger)
// - src absent, dst absent: no-op
//
// Regression for MUL-2646: the prior "don't overwrite" guard left per-task
// config.toml / config.json / instructions.md stuck on whatever snapshot they
// were seeded with at first Prepare. A user who edited ~/.codex/config.toml
// between runs — switching the active [model_providers.X] base_url, pointing
// env_key at a freshly rotated API key, or removing the file outright to
// drop a provider — kept hitting the stale per-task copy on session resume,
// with Codex calling the new URL using the old key (or replaying a provider
// the user had since deleted from the shared config).
//
// For config.toml the subsequent ensureCodex{Sandbox,MultiAgent,Memory}Config
// passes recreate the file from scratch when the shared source is gone, so
// the per-task home keeps the daemon-managed defaults but loses every
// user-managed [model_providers.X] / model_provider line that no longer
// exists in the shared config. For config.json / instructions.md there is
// no daemon-managed default, so they simply disappear in lockstep with the
// shared source.
func syncCopiedFile(src, dst string) error {
_, srcErr := os.Stat(src)
srcMissing := os.IsNotExist(srcErr)
if srcErr != nil && !srcMissing {
return fmt.Errorf("stat src %s: %w", src, srcErr)
}
// Don't overwrite existing file.
if _, err := os.Stat(dst); err == nil {
return nil
if _, err := os.Lstat(dst); err == nil {
if err := os.Remove(dst); err != nil {
return fmt.Errorf("remove stale dst %s: %w", dst, err)
}
}
if srcMissing {
return nil
}
return copyFile(src, dst)
}

View File

@@ -1751,6 +1751,200 @@ func TestPrepareCodexHome_RefreshesStaleAuthCopyOnReuse(t *testing.T) {
}
}
// Regression for MUL-2646: when the user updates `~/.codex/config.toml`
// between two task runs against the same per-task codex-home — e.g. to
// rotate the active [model_providers.X] base_url or point env_key at a
// new API key — the per-task copy must refresh from the shared source on
// Reuse(). Without this, Codex keeps reading the old provider URL / env
// var on session resume, so the agent hits the new endpoint with the old
// key and the API rejects the token. Symmetric to issue #2081's fix for
// the symlinked auth.json (covered above).
func TestPrepareCodexHome_RefreshesStaleCopiedConfigOnReuse(t *testing.T) {
// Cannot use t.Parallel() with t.Setenv.
sharedHome := t.TempDir()
oldConfig := `model_provider = "old-provider"
[model_providers.old-provider]
name = "Old"
base_url = "https://old.example.com"
env_key = "OLD_API_KEY"
`
if err := os.WriteFile(filepath.Join(sharedHome, "config.toml"), []byte(oldConfig), 0o644); err != nil {
t.Fatalf("seed shared config.toml: %v", err)
}
if err := os.WriteFile(filepath.Join(sharedHome, "config.json"), []byte(`{"model":"old-model"}`), 0o644); err != nil {
t.Fatalf("seed shared config.json: %v", err)
}
if err := os.WriteFile(filepath.Join(sharedHome, "instructions.md"), []byte("old instructions"), 0o644); err != nil {
t.Fatalf("seed shared instructions.md: %v", err)
}
t.Setenv("CODEX_HOME", sharedHome)
codexHome := filepath.Join(t.TempDir(), "codex-home")
if err := prepareCodexHome(codexHome, testLogger()); err != nil {
t.Fatalf("first prepareCodexHome: %v", err)
}
// User rotates provider + API key in the shared config between runs.
newConfig := `model_provider = "new-provider"
[model_providers.new-provider]
name = "New"
base_url = "https://new.example.com"
env_key = "NEW_API_KEY"
`
if err := os.WriteFile(filepath.Join(sharedHome, "config.toml"), []byte(newConfig), 0o644); err != nil {
t.Fatalf("rotate shared config.toml: %v", err)
}
if err := os.WriteFile(filepath.Join(sharedHome, "config.json"), []byte(`{"model":"new-model"}`), 0o644); err != nil {
t.Fatalf("rotate shared config.json: %v", err)
}
if err := os.WriteFile(filepath.Join(sharedHome, "instructions.md"), []byte("new instructions"), 0o644); err != nil {
t.Fatalf("rotate shared instructions.md: %v", err)
}
// Resume path: same per-task codex-home, re-prepared.
if err := prepareCodexHome(codexHome, testLogger()); err != nil {
t.Fatalf("second prepareCodexHome (resume): %v", err)
}
// config.toml must reflect the new provider/URL/env_key.
data, err := os.ReadFile(filepath.Join(codexHome, "config.toml"))
if err != nil {
t.Fatalf("read per-task config.toml: %v", err)
}
s := string(data)
for _, want := range []string{`model_provider = "new-provider"`, "https://new.example.com", "NEW_API_KEY"} {
if !strings.Contains(s, want) {
t.Errorf("per-task config.toml missing %q after refresh, got:\n%s", want, s)
}
}
for _, bad := range []string{"old-provider", "https://old.example.com", "OLD_API_KEY"} {
if strings.Contains(s, bad) {
t.Errorf("per-task config.toml still contains stale %q after refresh, got:\n%s", bad, s)
}
}
// Daemon-managed sandbox / multi-agent / memory blocks must all be
// re-applied on top of the fresh copy — PR correctness depends on it.
for _, marker := range []string{
multicaManagedBeginMarker,
multicaMultiAgentBeginMarker,
multicaMemoryFeatureBeginMarker,
multicaMemoryConfigBeginMarker,
} {
if !strings.Contains(s, marker) {
t.Errorf("daemon-managed marker %q missing after refresh, got:\n%s", marker, s)
}
}
// config.json must reflect the new model.
data, err = os.ReadFile(filepath.Join(codexHome, "config.json"))
if err != nil {
t.Fatalf("read per-task config.json: %v", err)
}
if string(data) != `{"model":"new-model"}` {
t.Errorf("per-task config.json content = %q, want refreshed contents", data)
}
// instructions.md must reflect the new content.
data, err = os.ReadFile(filepath.Join(codexHome, "instructions.md"))
if err != nil {
t.Fatalf("read per-task instructions.md: %v", err)
}
if string(data) != "new instructions" {
t.Errorf("per-task instructions.md content = %q, want refreshed contents", data)
}
}
// Regression for MUL-2646 (deletion arm): when the user removes a file from
// the shared ~/.codex/ between two task runs — for example by dropping the
// whole `~/.codex/config.toml`, removing `config.json`, or deleting
// `instructions.md` — the per-task copy must be dropped too, otherwise
// session resume keeps replaying a provider / instruction file the user has
// already removed from the shared config. For config.toml the subsequent
// daemon-managed ensure* passes recreate a minimal file with only the
// managed sandbox / multi-agent / memory blocks; for config.json and
// instructions.md the per-task copy simply disappears.
func TestPrepareCodexHome_DropsCopiedConfigWhenSharedSourceRemoved(t *testing.T) {
// Cannot use t.Parallel() with t.Setenv.
sharedHome := t.TempDir()
oldConfig := `model_provider = "old-provider"
[model_providers.old-provider]
name = "Old"
base_url = "https://old.example.com"
env_key = "OLD_API_KEY"
`
if err := os.WriteFile(filepath.Join(sharedHome, "config.toml"), []byte(oldConfig), 0o644); err != nil {
t.Fatalf("seed shared config.toml: %v", err)
}
if err := os.WriteFile(filepath.Join(sharedHome, "config.json"), []byte(`{"model":"old-model"}`), 0o644); err != nil {
t.Fatalf("seed shared config.json: %v", err)
}
if err := os.WriteFile(filepath.Join(sharedHome, "instructions.md"), []byte("old instructions"), 0o644); err != nil {
t.Fatalf("seed shared instructions.md: %v", err)
}
t.Setenv("CODEX_HOME", sharedHome)
codexHome := filepath.Join(t.TempDir(), "codex-home")
if err := prepareCodexHome(codexHome, testLogger()); err != nil {
t.Fatalf("first prepareCodexHome: %v", err)
}
// Sanity: first prepare seeded all three files into the per-task home.
for _, name := range []string{"config.toml", "config.json", "instructions.md"} {
if _, err := os.Stat(filepath.Join(codexHome, name)); err != nil {
t.Fatalf("first prepare did not seed per-task %s: %v", name, err)
}
}
// User removes the shared sources between runs.
for _, name := range []string{"config.toml", "config.json", "instructions.md"} {
if err := os.Remove(filepath.Join(sharedHome, name)); err != nil {
t.Fatalf("remove shared %s: %v", name, err)
}
}
// Resume path: same per-task codex-home, re-prepared.
if err := prepareCodexHome(codexHome, testLogger()); err != nil {
t.Fatalf("second prepareCodexHome (resume): %v", err)
}
// config.json and instructions.md have no daemon-managed default — they
// must disappear in lockstep with the shared source.
for _, name := range []string{"config.json", "instructions.md"} {
if _, err := os.Stat(filepath.Join(codexHome, name)); !os.IsNotExist(err) {
t.Errorf("per-task %s still exists after shared source removed (stat err = %v)", name, err)
}
}
// config.toml must still exist because the ensure* passes recreate it,
// but it must contain only the daemon-managed blocks — no stale user
// provider/URL/env_key.
data, err := os.ReadFile(filepath.Join(codexHome, "config.toml"))
if err != nil {
t.Fatalf("read per-task config.toml after shared removal: %v", err)
}
s := string(data)
for _, bad := range []string{"old-provider", "https://old.example.com", "OLD_API_KEY"} {
if strings.Contains(s, bad) {
t.Errorf("per-task config.toml still contains stale %q after shared source removed, got:\n%s", bad, s)
}
}
for _, marker := range []string{
multicaManagedBeginMarker,
multicaMultiAgentBeginMarker,
multicaMemoryFeatureBeginMarker,
multicaMemoryConfigBeginMarker,
} {
if !strings.Contains(s, marker) {
t.Errorf("daemon-managed marker %q missing after shared source removed, got:\n%s", marker, s)
}
}
}
func TestEnsureCodexSandboxConfigCreatesDefaultLinux(t *testing.T) {
t.Parallel()
dir := t.TempDir()