int: adjust a few models for integration tests (#11872)

2025-08-24 22:51:04 +02:00 · 2025-08-13 15:42:36 -07:00
parent dc5a645434
commit a24f90604f
1 changed files with 32 additions and 12 deletions
--- a/integration/concurrency_test.go
+++ b/integration/concurrency_test.go
@@ -4,7 +4,9 @@ package integration

 import (
 	"context"
+	"fmt"
 	"log/slog"
+	"math"
 	"os"
 	"strconv"
 	"sync"
@@ -21,7 +23,7 @@ func TestMultiModelConcurrency(t *testing.T) {
 	var (
 		req = [2]api.GenerateRequest{
 			{
-				Model:     "llama3.2:1b",
+				Model:     smol,
 				Prompt:    "why is the ocean blue?",
 				Stream:    &stream,
 				KeepAlive: &api.Duration{Duration: 10 * time.Second},
@@ -30,7 +32,7 @@ func TestMultiModelConcurrency(t *testing.T) {
 					"temperature": 0.0,
 				},
 			}, {
-				Model:     "tinydolphin",
+				Model:     "qwen3:0.6b",
 				Prompt:    "what is the origin of the us thanksgiving holiday?",
 				Stream:    &stream,
 				KeepAlive: &api.Duration{Duration: 10 * time.Second},
@@ -132,16 +134,16 @@ func TestMultiModelStress(t *testing.T) {
 			size: 2876 * format.MebiByte,
 		},
 		{
-			name: "phi",
-			size: 2616 * format.MebiByte,
+			name: "qwen3:0.6b",
+			size: 1600 * format.MebiByte,
 		},
 		{
 			name: "gemma:2b",
 			size: 2364 * format.MebiByte,
 		},
 		{
-			name: "stable-code:3b",
-			size: 2608 * format.MebiByte,
+			name: "deepseek-r1:1.5b",
+			size: 2048 * format.MebiByte,
 		},
 		{
 			name: "starcoder2:3b",
@@ -149,17 +151,21 @@ func TestMultiModelStress(t *testing.T) {
 		},
 	}
 	mediumModels := []model{
+		{
+			name: "qwen3:8b",
+			size: 6600 * format.MebiByte,
+		},
 		{
 			name: "llama2",
 			size: 5118 * format.MebiByte,
 		},
 		{
-			name: "mistral",
-			size: 4620 * format.MebiByte,
+			name: "deepseek-r1:7b",
+			size: 5600 * format.MebiByte,
 		},
 		{
-			name: "orca-mini:7b",
-			size: 5118 * format.MebiByte,
+			name: "mistral",
+			size: 4620 * format.MebiByte,
 		},
 		{
 			name: "dolphin-mistral",
@@ -254,7 +260,7 @@ func TestMultiModelStress(t *testing.T) {
 	}
 	go func() {
 		for {
-			time.Sleep(2 * time.Second)
+			time.Sleep(10 * time.Second)
 			select {
 			case <-ctx.Done():
 				return
@@ -265,7 +271,21 @@ func TestMultiModelStress(t *testing.T) {
 					continue
 				}
 				for _, m := range models.Models {
-					slog.Info("loaded model snapshot", "model", m)
+					var procStr string
+					switch {
+					case m.SizeVRAM == 0:
+						procStr = "100% CPU"
+					case m.SizeVRAM == m.Size:
+						procStr = "100% GPU"
+					case m.SizeVRAM > m.Size || m.Size == 0:
+						procStr = "Unknown"
+					default:
+						sizeCPU := m.Size - m.SizeVRAM
+						cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
+						procStr = fmt.Sprintf("%d%%/%d%%", int(cpuPercent), int(100-cpuPercent))
+					}
+
+					slog.Info("loaded model snapshot", "model", m.Name, "CPU/GPU", procStr, "expires", format.HumanTime(m.ExpiresAt, "Never"))
 				}
 			}
 		}