review comments and coverage

2025-09-19 21:32:48 +02:00 · 2024-06-05 12:07:20 -07:00
parent ff4f0cbd1d
commit 6f351bf586
18 changed files with 375 additions and 456 deletions
--- a/llm/memory_test.go
+++ b/llm/memory_test.go
@@ -18,7 +18,7 @@ func TestEstimateGPULayers(t *testing.T) {
 	envconfig.Debug = true
 	modelName := "dummy"
 	f, err := os.CreateTemp(t.TempDir(), modelName)
-	assert.Nil(t, err)
+	require.NoError(t, err)
 	defer f.Close()
 	gguf := NewGGUFV3(binary.LittleEndian)
 	inputLayerCount := 5
@@ -30,7 +30,7 @@ func TestEstimateGPULayers(t *testing.T) {
 		{Name: "blk.4.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
 		{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
 	}
-	assert.Equal(t, inputLayerCount+1, len(tensors))
+	assert.Len(t, tensors, inputLayerCount+1)
 	err = gguf.Encode(f, KV{
 		"general.architecture":          "llama",
 		"general.name":                  "name",
@@ -56,9 +56,11 @@ func TestEstimateGPULayers(t *testing.T) {
 	}
 	projectors := []string{}
 	opts := api.DefaultOptions()
-	estimate := EstimateGPULayers(gpus, ggml, projectors, opts)
-	assert.Equal(t, 0, estimate.Layers)
-	assert.Equal(t, uint64(0), estimate.Graph)
+	t.Run("cpu", func(t *testing.T) {
+		estimate := EstimateGPULayers(gpus, ggml, projectors, opts)
+		assert.Equal(t, 0, estimate.Layers)
+		assert.Equal(t, uint64(0), estimate.Graph)
+	})

 	// derived from the dummy ggml file above
 	graphPartialOffload := uint64(202377216)
@@ -80,7 +82,10 @@ func TestEstimateGPULayers(t *testing.T) {
 		},
 	}
 	// Nested array: GPU0 layer space, GPU1 layer space, expected gpu0, expected gpu1
-	for i, s := range [][]uint64{
+	for i, s := range []struct {
+		layer0, layer1   uint64
+		expect0, expect1 uint64
+	}{
 		{1, 1, 1, 1},
 		{2, 1, 2, 1},
 		{2, 2, 2, 2},
@@ -90,27 +95,33 @@ func TestEstimateGPULayers(t *testing.T) {
 		{6, 6, 3, 3},
 		{0, 3, 0, 3},
 	} {
-		gpus[0].FreeMemory = 0
-		gpus[1].FreeMemory = 0
-		gpus[0].FreeMemory += projectorSize + memoryLayerOutput
-		gpus[0].FreeMemory += gpuMinimumMemory + layerSize + s[0]*layerSize + 1
-		gpus[1].FreeMemory += gpuMinimumMemory + layerSize + s[1]*layerSize + 1
-		gpus[0].FreeMemory += max(graphFullOffload, graphPartialOffload)
-		gpus[1].FreeMemory += max(graphFullOffload, graphPartialOffload)
-		estimate = EstimateGPULayers(gpus, ggml, projectors, opts)
-		assert.Equal(t, int(s[2]+s[3]), estimate.Layers, "scenario %d: %v", i, s)
-		assert.Equal(t, fmt.Sprintf("%d,%d", s[2], s[3]), estimate.TensorSplit, "scenario %d: %v", i, s)
-		var layerSums uint64
-		for _, b := range estimate.GPUSizes {
-			layerSums += b
-		}
-		if estimate.Layers < inputLayerCount+1 {
-			assert.Less(t, estimate.VRAMSize, estimate.TotalSize, "scenario %d: %v %+v", i, s, estimate)
-			assert.Equal(t, estimate.VRAMSize, layerSums, "scenario %d: %v %+v", i, s, estimate)
-		} else {
-			assert.Equal(t, estimate.VRAMSize, estimate.TotalSize, "scenario %d: %v %+v", i, s, estimate)
-			assert.Equal(t, estimate.TotalSize, layerSums, "scenario %d: %v %+v", i, s, estimate)
-		}
+		t.Run(fmt.Sprintf("%v", s), func(t *testing.T) {
+			gpus[0].FreeMemory = 0
+			gpus[1].FreeMemory = 0
+			gpus[0].FreeMemory += projectorSize
+			if s.layer0 > 0 {
+				gpus[0].FreeMemory += memoryLayerOutput
+			} else {
+				gpus[1].FreeMemory += memoryLayerOutput
+			}
+			gpus[0].FreeMemory += gpuMinimumMemory + layerSize + s.layer0*layerSize + 1
+			gpus[1].FreeMemory += gpuMinimumMemory + layerSize + s.layer1*layerSize + 1
+			gpus[0].FreeMemory += max(graphFullOffload, graphPartialOffload)
+			gpus[1].FreeMemory += max(graphFullOffload, graphPartialOffload)
+			estimate := EstimateGPULayers(gpus, ggml, projectors, opts)
+			assert.Equal(t, int(s.expect0+s.expect1), estimate.Layers, "scenario %d: %v", i, s)
+			assert.Equal(t, fmt.Sprintf("%d,%d", s.expect0, s.expect1), estimate.TensorSplit, "scenario %d: %v", i, s)
+			var layerSums uint64
+			for _, b := range estimate.GPUSizes {
+				layerSums += b
+			}
+			if estimate.Layers < inputLayerCount+1 {
+				assert.Less(t, estimate.VRAMSize, estimate.TotalSize, "scenario %d: %v %+v", i, s, estimate)
+				assert.Equal(t, estimate.VRAMSize, layerSums, "scenario %d: %v %+v", i, s, estimate)
+			} else {
+				assert.Equal(t, estimate.VRAMSize, estimate.TotalSize, "scenario %d: %v %+v", i, s, estimate)
+				assert.Equal(t, estimate.TotalSize, layerSums, "scenario %d: %v %+v", i, s, estimate)
+			}
+		})
 	}
-
 }