From 033cec232a7e6702d6a79ca22ca81200fad5873b Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Wed, 12 Mar 2025 14:18:06 -0700
Subject: [PATCH 1/5] count gemma3 vision tensors

---
 fs/ggml/ggml.go | 8 ++++++++
 llm/memory.go   | 4 ++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go
index d32296d9c..00392b4af 100644
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -611,6 +611,14 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
 			embeddingLength*numPatches*maxNumTiles +
 			9*embeddingLength*numPaddedPatches*maxNumTiles +
 			numPaddedPatches*maxNumTiles*numPaddedPatches*maxNumTiles*headCount)
+	case "gemma3":
+		for name, layer := range llm.Tensors().GroupLayers() {
+			if strings.HasPrefix(name, "v.") {
+				for _, tensor := range layer {
+					weights += tensor.Size()
+				}
+			}
+		}
 	}
 	return weights, graphSize
 }
diff --git a/llm/memory.go b/llm/memory.go
index 40104eca9..ac830ee84 100644
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -218,8 +218,8 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
 		if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok {
 			layerSize = blk.Size()
 			layerSize += kv / f.KV().BlockCount()
+			memoryWeights += blk.Size()
 		}
-		memoryWeights += layerSize
 
 		if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {
 			// Stop allocating on GPU(s) once we hit the users target NumGPU
@@ -376,7 +376,7 @@ func (m MemoryEstimate) LogValue() slog.Value {
 				// memory of the weights
 				"total", format.HumanBytes2(m.memoryWeights),
 				// memory of repeating layers
-				"repeating", format.HumanBytes2(m.memoryWeights-m.memoryLayerOutput),
+				"repeating", format.HumanBytes2(m.memoryWeights),
 				// memory of non-repeating layers
 				"nonrepeating", format.HumanBytes2(m.memoryLayerOutput),
 			),

From d2ec22371edba325903588b9515dd94b15b80d76 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Wed, 12 Mar 2025 16:08:24 -0700
Subject: [PATCH 2/5] count all vision tensors

---
 fs/ggml/ggml.go | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go
index 00392b4af..da3ee0a79 100644
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -579,12 +579,16 @@ func (f GGML) GraphSize(context, batch uint64, kvCacheType string) (kv, partialO
 }
 
 func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
+	for name, layer := range llm.Tensors().GroupLayers() {
+		if strings.HasPrefix(name, "v.") {
+			for _, tensor := range layer {
+				weights += tensor.Size()
+			}
+		}
+	}
+
 	switch llm.KV().Architecture() {
 	case "mllama":
-		for _, layer := range llm.Tensors().GroupLayers()["v"] {
-			weights += layer.Size()
-		}
-
 		kv := func(n string) uint64 {
 			if v, ok := llm.KV()["mllama.vision."+n].(uint32); ok {
 				return uint64(v)
@@ -611,15 +615,8 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
 			embeddingLength*numPatches*maxNumTiles +
 			9*embeddingLength*numPaddedPatches*maxNumTiles +
 			numPaddedPatches*maxNumTiles*numPaddedPatches*maxNumTiles*headCount)
-	case "gemma3":
-		for name, layer := range llm.Tensors().GroupLayers() {
-			if strings.HasPrefix(name, "v.") {
-				for _, tensor := range layer {
-					weights += tensor.Size()
-				}
-			}
-		}
 	}
+
 	return weights, graphSize
 }
 

From a422ba39c94adc870da84e5fa442c0bf81c77f27 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Thu, 13 Mar 2025 14:41:57 -0700
Subject: [PATCH 3/5] roughly count gemma3 graph

the largest operation is by far (q @ k) so just count that for
simplicity
---
 fs/ggml/ggml.go | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go
index da3ee0a79..be1dffe0d 100644
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -587,34 +587,32 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
 		}
 	}
 
+	imageSize := uint64(llm.KV().Uint("vision.image_size"))
+	patchSize := uint64(llm.KV().Uint("vision.patch_size"))
+
+	numPatches := (imageSize / patchSize) * (imageSize / patchSize)
+	if _, ok := llm.Tensors().GroupLayers()["v"]["class_embd"]; ok {
+		numPatches++
+	}
+
+	headCount := uint64(llm.KV().Uint("vision.attention.head_count"))
+
 	switch llm.KV().Architecture() {
 	case "mllama":
-		kv := func(n string) uint64 {
-			if v, ok := llm.KV()["mllama.vision."+n].(uint32); ok {
-				return uint64(v)
-			}
-
-			return 0
-		}
-
-		imageSize := kv("image_size")
-
-		maxNumTiles := kv("max_num_tiles")
-		embeddingLength := kv("embedding_length")
-		headCount := kv("attention.head_count")
-
-		numPatches := (imageSize / kv("patch_size")) * (imageSize / kv("patch_size"))
-		if _, ok := llm.Tensors().GroupLayers()["v"]["class_embd"]; ok {
-			numPatches++
-		}
 
 		numPaddedPatches := numPatches + 8 - (numPatches%8)%8
 
+		maxNumTiles := uint64(llm.KV().Uint("vision.max_num_tiles"))
+		numChannels := uint64(llm.KV().Uint("vision.num_channels"))
+		embeddingLength := uint64(llm.KV().Uint("vision.embedding_length"))
+
 		graphSize = 4 * (8 +
-			imageSize*imageSize*kv("num_channels")*maxNumTiles +
+			imageSize*imageSize*numChannels*maxNumTiles +
 			embeddingLength*numPatches*maxNumTiles +
 			9*embeddingLength*numPaddedPatches*maxNumTiles +
 			numPaddedPatches*maxNumTiles*numPaddedPatches*maxNumTiles*headCount)
+	case "gemma3":
+		graphSize = 4 * (numPatches * numPatches * headCount)
 	}
 
 	return weights, graphSize

From 65b88c544f08ce3e5b1d193e82b72735095f795c Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Thu, 13 Mar 2025 15:05:42 -0700
Subject: [PATCH 4/5] fix divide by zero

---
 fs/ggml/ggml.go | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go
index be1dffe0d..2c04559f2 100644
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -579,6 +579,10 @@ func (f GGML) GraphSize(context, batch uint64, kvCacheType string) (kv, partialO
 }
 
 func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
+	if llm.KV().Uint("vision.block_count") == 0 {
+		return
+	}
+
 	for name, layer := range llm.Tensors().GroupLayers() {
 		if strings.HasPrefix(name, "v.") {
 			for _, tensor := range layer {
@@ -589,6 +593,12 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
 
 	imageSize := uint64(llm.KV().Uint("vision.image_size"))
 	patchSize := uint64(llm.KV().Uint("vision.patch_size"))
+	if patchSize == 0 {
+		slog.Warn("unknown patch size for vision model")
+		return
+	}
+
+	numChannels := uint64(llm.KV().Uint("vision.num_channels"))
 
 	numPatches := (imageSize / patchSize) * (imageSize / patchSize)
 	if _, ok := llm.Tensors().GroupLayers()["v"]["class_embd"]; ok {
@@ -596,15 +606,13 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
 	}
 
 	headCount := uint64(llm.KV().Uint("vision.attention.head_count"))
+	embeddingLength := uint64(llm.KV().Uint("vision.embedding_length"))
 
 	switch llm.KV().Architecture() {
 	case "mllama":
-
 		numPaddedPatches := numPatches + 8 - (numPatches%8)%8
 
 		maxNumTiles := uint64(llm.KV().Uint("vision.max_num_tiles"))
-		numChannels := uint64(llm.KV().Uint("vision.num_channels"))
-		embeddingLength := uint64(llm.KV().Uint("vision.embedding_length"))
 
 		graphSize = 4 * (8 +
 			imageSize*imageSize*numChannels*maxNumTiles +
@@ -612,7 +620,9 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
 			9*embeddingLength*numPaddedPatches*maxNumTiles +
 			numPaddedPatches*maxNumTiles*numPaddedPatches*maxNumTiles*headCount)
 	case "gemma3":
-		graphSize = 4 * (numPatches * numPatches * headCount)
+		graphSize = 4 * (imageSize*imageSize*numChannels +
+			embeddingLength*patchSize +
+			numPatches*numPatches*headCount)
 	}
 
 	return weights, graphSize

From 8d76fa23ef058d98cb66ad9c3ecc5fff6d380ca1 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Thu, 13 Mar 2025 16:53:22 -0700
Subject: [PATCH 5/5] count non-repeating vision layers

---
 fs/ggml/ggml.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go
index 2c04559f2..dd84380b5 100644
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -584,7 +584,7 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
 	}
 
 	for name, layer := range llm.Tensors().GroupLayers() {
-		if strings.HasPrefix(name, "v.") {
+		if name == "v" || strings.HasPrefix(name, "v.") {
 			for _, tensor := range layer {
 				weights += tensor.Size()
 			}