diff --git a/server/pkg/agent/models.go b/server/pkg/agent/models.go index faded3e5d..94f2aa788 100644 --- a/server/pkg/agent/models.go +++ b/server/pkg/agent/models.go @@ -182,6 +182,15 @@ func cachedDiscovery(key string, fn func() ([]Model, error)) ([]Model, error) { return nil, err } + // Don't cache an empty result. Zero models is almost always a transient + // failure (discovery CLI timeout, not-logged-in, network blip) rather than + // a runtime that genuinely has no models; caching it would keep the picker + // blank for the full TTL even after the cause clears. Skipping the cache + // lets the next request retry immediately. See #3729. + if len(models) == 0 { + return models, nil + } + modelCacheMu.Lock() modelCache[key] = modelCacheEntry{models: models, expiresAt: time.Now().Add(modelCacheTTL)} modelCacheMu.Unlock() @@ -563,7 +572,13 @@ func discoverPiModels(ctx context.Context, executablePath string) ([]Model, erro if _, err := exec.LookPath(executablePath); err != nil { return []Model{}, nil } - runCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + // Newer pi fetches its catalog from each configured provider over the + // network, so discovery time scales with provider count — a multi-provider + // setup measured ~4.6-4.8s, right at the old 5s cap. When jitter pushed it + // over, the daemon killed the command before it printed anything and the + // model picker came back empty while the runtime stayed online. 15s matches + // the opencode discovery cap (see #3729, same class as #3627). + runCtx, cancel := context.WithTimeout(ctx, 15*time.Second) defer cancel() cmd := exec.CommandContext(runCtx, executablePath, "--list-models") hideAgentWindow(cmd) @@ -979,7 +994,10 @@ func discoverCursorModels(ctx context.Context, executablePath string) ([]Model, if _, err := exec.LookPath(executablePath); err != nil { return cursorStaticModels(), nil } - runCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + // 15s to match the other network-backed discovery paths (pi/opencode/ACP); + // cursor-agent fetches its frequently-changing catalog, so a tight cap can + // time out and fall back to the minimal static list. See #3729. + runCtx, cancel := context.WithTimeout(ctx, 15*time.Second) defer cancel() cmd := exec.CommandContext(runCtx, executablePath, "--list-models") hideAgentWindow(cmd) diff --git a/server/pkg/agent/models_test.go b/server/pkg/agent/models_test.go index 47c12dce1..a3fe4b99b 100644 --- a/server/pkg/agent/models_test.go +++ b/server/pkg/agent/models_test.go @@ -407,6 +407,57 @@ exit 1 } } +// TestCachedDiscoveryDoesNotCacheEmpty verifies that an empty discovery result +// is not cached, so a transient failure (e.g. a `pi --list-models` timeout) +// doesn't keep the model picker blank for the full TTL. A non-empty result is +// still cached. See #3729. +func TestCachedDiscoveryDoesNotCacheEmpty(t *testing.T) { + const emptyKey, nonEmptyKey = "test-cache-empty", "test-cache-nonempty" + // modelCache is a package-level global; clear our keys up front and on + // cleanup so the test stays hermetic under `go test -count=N` (a leftover + // non-empty entry from a prior run would otherwise skip the callback). + resetCache := func() { + modelCacheMu.Lock() + delete(modelCache, emptyKey) + delete(modelCache, nonEmptyKey) + modelCacheMu.Unlock() + } + resetCache() + t.Cleanup(resetCache) + + emptyCalls := 0 + empty := func() ([]Model, error) { + emptyCalls++ + return []Model{}, nil + } + for i := 0; i < 2; i++ { + got, err := cachedDiscovery(emptyKey, empty) + if err != nil { + t.Fatalf("cachedDiscovery: %v", err) + } + if len(got) != 0 { + t.Fatalf("expected empty result, got %+v", got) + } + } + if emptyCalls != 2 { + t.Fatalf("empty result must not be cached: expected fn called 2x, got %d", emptyCalls) + } + + nonEmptyCalls := 0 + nonEmpty := func() ([]Model, error) { + nonEmptyCalls++ + return []Model{{ID: "provider/model"}}, nil + } + for i := 0; i < 2; i++ { + if _, err := cachedDiscovery(nonEmptyKey, nonEmpty); err != nil { + t.Fatalf("cachedDiscovery: %v", err) + } + } + if nonEmptyCalls != 1 { + t.Fatalf("non-empty result must be cached: expected fn called 1x, got %d", nonEmptyCalls) + } +} + func TestParsePiModels(t *testing.T) { input := `openai:gpt-4o anthropic:claude-opus-4-7