mirror of
https://github.com/ollama/ollama.git
synced 2025-11-10 19:48:14 +01:00
tests: reduce stress on CPU to 2 models (#12161)
* tests: reduce stress on CPU to 2 models This should avoid flakes due to systems getting overloaded with 3 (or more) models running concurrently * tests: allow slow systems to pass on timeout If a slow system is still streaming a response, and the response will pass validation, don't fail just because the system is slow. * test: unload embedding models more quickly
This commit is contained in:
@@ -38,8 +38,9 @@ func TestAllMiniLMEmbeddings(t *testing.T) {
|
||||
defer cleanup()
|
||||
|
||||
req := api.EmbeddingRequest{
|
||||
Model: "all-minilm",
|
||||
Prompt: "why is the sky blue?",
|
||||
Model: "all-minilm",
|
||||
Prompt: "why is the sky blue?",
|
||||
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||
}
|
||||
|
||||
res, err := embeddingTestHelper(ctx, client, t, req)
|
||||
|
||||
Reference in New Issue
Block a user