From 033cec232a7e6702d6a79ca22ca81200fad5873b Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Wed, 12 Mar 2025 14:18:06 -0700
Subject: [PATCH] count gemma3 vision tensors

---
 fs/ggml/ggml.go | 8 ++++++++
 llm/memory.go   | 4 ++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go
index d32296d9c..00392b4af 100644
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -611,6 +611,14 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
 			embeddingLength*numPatches*maxNumTiles +
 			9*embeddingLength*numPaddedPatches*maxNumTiles +
 			numPaddedPatches*maxNumTiles*numPaddedPatches*maxNumTiles*headCount)
+	case "gemma3":
+		for name, layer := range llm.Tensors().GroupLayers() {
+			if strings.HasPrefix(name, "v.") {
+				for _, tensor := range layer {
+					weights += tensor.Size()
+				}
+			}
+		}
 	}
 	return weights, graphSize
 }
diff --git a/llm/memory.go b/llm/memory.go
index 40104eca9..ac830ee84 100644
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -218,8 +218,8 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
 		if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok {
 			layerSize = blk.Size()
 			layerSize += kv / f.KV().BlockCount()
+			memoryWeights += blk.Size()
 		}
-		memoryWeights += layerSize
 
 		if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {
 			// Stop allocating on GPU(s) once we hit the users target NumGPU
@@ -376,7 +376,7 @@ func (m MemoryEstimate) LogValue() slog.Value {
 				// memory of the weights
 				"total", format.HumanBytes2(m.memoryWeights),
 				// memory of repeating layers
-				"repeating", format.HumanBytes2(m.memoryWeights-m.memoryLayerOutput),
+				"repeating", format.HumanBytes2(m.memoryWeights),
 				// memory of non-repeating layers
 				"nonrepeating", format.HumanBytes2(m.memoryLayerOutput),
 			),