diff --git a/server/pkg/agent/models.go b/server/pkg/agent/models.go index faded3e5d..ef9e4f6ca 100644 --- a/server/pkg/agent/models.go +++ b/server/pkg/agent/models.go @@ -182,6 +182,15 @@ func cachedDiscovery(key string, fn func() ([]Model, error)) ([]Model, error) { return nil, err } + // Don't cache an empty result. Zero models is almost always a transient + // failure (discovery CLI timeout, not-logged-in, network blip) rather than + // a runtime that genuinely has no models; caching it would keep the picker + // blank for the full TTL even after the cause clears. Skipping the cache + // lets the next request retry immediately. See #3729. + if len(models) == 0 { + return models, nil + } + modelCacheMu.Lock() modelCache[key] = modelCacheEntry{models: models, expiresAt: time.Now().Add(modelCacheTTL)} modelCacheMu.Unlock() @@ -563,7 +572,13 @@ func discoverPiModels(ctx context.Context, executablePath string) ([]Model, erro if _, err := exec.LookPath(executablePath); err != nil { return []Model{}, nil } - runCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + // Newer pi fetches its catalog from each configured provider over the + // network, so discovery time scales with provider count — a multi-provider + // setup measured ~4.6-4.8s, right at the old 5s cap. When jitter pushed it + // over, the daemon killed the command before it printed anything and the + // model picker came back empty while the runtime stayed online. 15s matches + // the opencode discovery cap (see #3729, same class as #3627). + runCtx, cancel := context.WithTimeout(ctx, 15*time.Second) defer cancel() cmd := exec.CommandContext(runCtx, executablePath, "--list-models") hideAgentWindow(cmd) diff --git a/server/pkg/agent/models_test.go b/server/pkg/agent/models_test.go index 47c12dce1..c1123a81d 100644 --- a/server/pkg/agent/models_test.go +++ b/server/pkg/agent/models_test.go @@ -407,6 +407,44 @@ exit 1 } } +// TestCachedDiscoveryDoesNotCacheEmpty verifies that an empty discovery result +// is not cached, so a transient failure (e.g. a `pi --list-models` timeout) +// doesn't keep the model picker blank for the full TTL. A non-empty result is +// still cached. See #3729. +func TestCachedDiscoveryDoesNotCacheEmpty(t *testing.T) { + emptyCalls := 0 + empty := func() ([]Model, error) { + emptyCalls++ + return []Model{}, nil + } + for i := 0; i < 2; i++ { + got, err := cachedDiscovery("test-cache-empty", empty) + if err != nil { + t.Fatalf("cachedDiscovery: %v", err) + } + if len(got) != 0 { + t.Fatalf("expected empty result, got %+v", got) + } + } + if emptyCalls != 2 { + t.Fatalf("empty result must not be cached: expected fn called 2x, got %d", emptyCalls) + } + + nonEmptyCalls := 0 + nonEmpty := func() ([]Model, error) { + nonEmptyCalls++ + return []Model{{ID: "provider/model"}}, nil + } + for i := 0; i < 2; i++ { + if _, err := cachedDiscovery("test-cache-nonempty", nonEmpty); err != nil { + t.Fatalf("cachedDiscovery: %v", err) + } + } + if nonEmptyCalls != 1 { + t.Fatalf("non-empty result must be cached: expected fn called 1x, got %d", nonEmptyCalls) + } +} + func TestParsePiModels(t *testing.T) { input := `openai:gpt-4o anthropic:claude-opus-4-7