mirror of
https://github.com/ollama/ollama.git
synced 2025-11-11 05:07:57 +01:00
test: harden scheduler tests (#12662)
* test: harden scheduler tests This removes reschedDelay which was stale code, and adds a new configurable timeout for the waitForVRAMRecovery so tests can now set the timeout to be very short to avoid the scheduler getting stuck and hitting a test timeout. * test: tune tests for partial loads Give stress tests more time when the model is split between CPU/GPU
This commit is contained in:
@@ -109,6 +109,8 @@ func TestMultiModelStress(t *testing.T) {
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
initialTimeout := 120 * time.Second
|
||||
streamTimeout := 20 * time.Second
|
||||
|
||||
// Make sure all the models are pulled before we get started
|
||||
for _, model := range chosenModels {
|
||||
@@ -147,6 +149,8 @@ chooseModels:
|
||||
for _, m := range models.Models {
|
||||
if m.SizeVRAM == 0 {
|
||||
slog.Info("model running on CPU", "name", m.Name, "target", targetLoadCount, "chosen", chosenModels[:targetLoadCount])
|
||||
initialTimeout = 240 * time.Second
|
||||
streamTimeout = 30 * time.Second
|
||||
break chooseModels
|
||||
}
|
||||
}
|
||||
@@ -172,10 +176,7 @@ chooseModels:
|
||||
k := r.Int() % len(reqs)
|
||||
reqs[k].Model = chosenModels[i]
|
||||
slog.Info("Starting", "model", reqs[k].Model, "iteration", j, "request", reqs[k].Messages[0].Content)
|
||||
DoChat(ctx, t, client, reqs[k], resps[k],
|
||||
120*time.Second, // Be extra patient for the model to load initially
|
||||
10*time.Second, // Once results start streaming, fail if they stall
|
||||
)
|
||||
DoChat(ctx, t, client, reqs[k], resps[k], initialTimeout, streamTimeout)
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user