Integration test tuning (#12492)

Remove some flaky scenarios, and switch to chat for better reliability
2025-11-10 19:58:29 +01:00 · 2025-10-08 09:51:25 -07:00
parent 303be9304c
commit 4e5d862ec4
9 changed files with 195 additions and 90 deletions
--- a/integration/concurrency_test.go
+++ b/integration/concurrency_test.go
@@ -20,9 +20,9 @@ import (
 )

 // Send multiple requests in parallel (concurrently) to a single model and ensure responses are expected
-func TestConcurrentGenerate(t *testing.T) {
+func TestConcurrentChat(t *testing.T) {
 	// Assumes all requests have the same model
-	req, resp := GenerateRequests()
+	req, resp := ChatRequests()
 	numParallel := int(envconfig.NumParallel() + 1)
 	iterLimit := 3

@@ -57,7 +57,7 @@ func TestConcurrentGenerate(t *testing.T) {
 				slog.Info("Starting", "thread", i, "iter", j)
 				// On slower GPUs it can take a while to process the concurrent requests
 				// so we allow a much longer initial timeout
-				DoGenerate(ctx, t, client, req[k], resp[k], 120*time.Second, 20*time.Second)
+				DoChat(ctx, t, client, req[k], resp[k], 120*time.Second, 20*time.Second)
 			}
 		}(i)
 	}
@@ -163,7 +163,7 @@ chooseModels:
 		wg.Add(1)
 		go func(i int) {
 			defer wg.Done()
-			reqs, resps := GenerateRequests()
+			reqs, resps := ChatRequests()
 			for j := 0; j < 3; j++ {
 				if time.Now().Sub(started) > softTimeout {
 					slog.Info("exceeded soft timeout, winding down test")
@@ -171,8 +171,8 @@ chooseModels:
 				}
 				k := r.Int() % len(reqs)
 				reqs[k].Model = chosenModels[i]
-				slog.Info("Starting", "model", reqs[k].Model, "iteration", j, "request", reqs[k].Prompt)
-				DoGenerate(ctx, t, client, reqs[k], resps[k],
+				slog.Info("Starting", "model", reqs[k].Model, "iteration", j, "request", reqs[k].Messages[0].Content)
+				DoChat(ctx, t, client, reqs[k], resps[k],
 					120*time.Second, // Be extra patient for the model to load initially
 					10*time.Second,  // Once results start streaming, fail if they stall
 				)