int: adjust a few models for integration tests (#11872)

This commit is contained in:
Daniel Hiltgen
2025-08-13 15:42:36 -07:00
committed by GitHub
parent dc5a645434
commit a24f90604f

View File

@@ -4,7 +4,9 @@ package integration
import (
"context"
"fmt"
"log/slog"
"math"
"os"
"strconv"
"sync"
@@ -21,7 +23,7 @@ func TestMultiModelConcurrency(t *testing.T) {
var (
req = [2]api.GenerateRequest{
{
Model: "llama3.2:1b",
Model: smol,
Prompt: "why is the ocean blue?",
Stream: &stream,
KeepAlive: &api.Duration{Duration: 10 * time.Second},
@@ -30,7 +32,7 @@ func TestMultiModelConcurrency(t *testing.T) {
"temperature": 0.0,
},
}, {
Model: "tinydolphin",
Model: "qwen3:0.6b",
Prompt: "what is the origin of the us thanksgiving holiday?",
Stream: &stream,
KeepAlive: &api.Duration{Duration: 10 * time.Second},
@@ -132,16 +134,16 @@ func TestMultiModelStress(t *testing.T) {
size: 2876 * format.MebiByte,
},
{
name: "phi",
size: 2616 * format.MebiByte,
name: "qwen3:0.6b",
size: 1600 * format.MebiByte,
},
{
name: "gemma:2b",
size: 2364 * format.MebiByte,
},
{
name: "stable-code:3b",
size: 2608 * format.MebiByte,
name: "deepseek-r1:1.5b",
size: 2048 * format.MebiByte,
},
{
name: "starcoder2:3b",
@@ -149,17 +151,21 @@ func TestMultiModelStress(t *testing.T) {
},
}
mediumModels := []model{
{
name: "qwen3:8b",
size: 6600 * format.MebiByte,
},
{
name: "llama2",
size: 5118 * format.MebiByte,
},
{
name: "mistral",
size: 4620 * format.MebiByte,
name: "deepseek-r1:7b",
size: 5600 * format.MebiByte,
},
{
name: "orca-mini:7b",
size: 5118 * format.MebiByte,
name: "mistral",
size: 4620 * format.MebiByte,
},
{
name: "dolphin-mistral",
@@ -254,7 +260,7 @@ func TestMultiModelStress(t *testing.T) {
}
go func() {
for {
time.Sleep(2 * time.Second)
time.Sleep(10 * time.Second)
select {
case <-ctx.Done():
return
@@ -265,7 +271,21 @@ func TestMultiModelStress(t *testing.T) {
continue
}
for _, m := range models.Models {
slog.Info("loaded model snapshot", "model", m)
var procStr string
switch {
case m.SizeVRAM == 0:
procStr = "100% CPU"
case m.SizeVRAM == m.Size:
procStr = "100% GPU"
case m.SizeVRAM > m.Size || m.Size == 0:
procStr = "Unknown"
default:
sizeCPU := m.Size - m.SizeVRAM
cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
procStr = fmt.Sprintf("%d%%/%d%%", int(cpuPercent), int(100-cpuPercent))
}
slog.Info("loaded model snapshot", "model", m.Name, "CPU/GPU", procStr, "expires", format.HumanTime(m.ExpiresAt, "Never"))
}
}
}