From 033cec232a7e6702d6a79ca22ca81200fad5873b Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 12 Mar 2025 14:18:06 -0700 Subject: [PATCH] count gemma3 vision tensors --- fs/ggml/ggml.go | 8 ++++++++ llm/memory.go | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index d32296d9c..00392b4af 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -611,6 +611,14 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) { embeddingLength*numPatches*maxNumTiles + 9*embeddingLength*numPaddedPatches*maxNumTiles + numPaddedPatches*maxNumTiles*numPaddedPatches*maxNumTiles*headCount) + case "gemma3": + for name, layer := range llm.Tensors().GroupLayers() { + if strings.HasPrefix(name, "v.") { + for _, tensor := range layer { + weights += tensor.Size() + } + } + } } return weights, graphSize } diff --git a/llm/memory.go b/llm/memory.go index 40104eca9..ac830ee84 100644 --- a/llm/memory.go +++ b/llm/memory.go @@ -218,8 +218,8 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok { layerSize = blk.Size() layerSize += kv / f.KV().BlockCount() + memoryWeights += blk.Size() } - memoryWeights += layerSize if opts.NumGPU >= 0 && layerCount >= opts.NumGPU { // Stop allocating on GPU(s) once we hit the users target NumGPU @@ -376,7 +376,7 @@ func (m MemoryEstimate) LogValue() slog.Value { // memory of the weights "total", format.HumanBytes2(m.memoryWeights), // memory of repeating layers - "repeating", format.HumanBytes2(m.memoryWeights-m.memoryLayerOutput), + "repeating", format.HumanBytes2(m.memoryWeights), // memory of non-repeating layers "nonrepeating", format.HumanBytes2(m.memoryLayerOutput), ),