tests: reduce stress on CPU to 2 models (#12161)

* tests: reduce stress on CPU to 2 models This should avoid flakes due to systems getting overloaded with 3 (or more) models running concurrently * tests: allow slow systems to pass on timeout If a slow system is still streaming a response, and the response will pass validation, don't fail just because the system is slow. * test: unload embedding models more quickly
2025-11-10 19:48:14 +01:00 · 2025-09-09 09:32:15 -07:00
parent f810ec741c
commit 6745182885
3 changed files with 55 additions and 30 deletions
--- a/integration/embed_test.go
+++ b/integration/embed_test.go
@@ -38,8 +38,9 @@ func TestAllMiniLMEmbeddings(t *testing.T) {
 	defer cleanup()

 	req := api.EmbeddingRequest{
-		Model:  "all-minilm",
-		Prompt: "why is the sky blue?",
+		Model:     "all-minilm",
+		Prompt:    "why is the sky blue?",
+		KeepAlive: &api.Duration{Duration: 10 * time.Second},
 	}

 	res, err := embeddingTestHelper(ctx, client, t, req)