From 65b0f329d1b6a9de6091c86a044b98fe6c543330 Mon Sep 17 00:00:00 2001 From: jmorganca Date: Tue, 11 Mar 2025 11:53:37 +0100 Subject: [PATCH] Revert "Allow models to force a new batch" This reverts commit c7eae586b899083acebcd9b3847b89ea78c2850c. --- model/input/input.go | 6 ------ model/models/gemma3/model.go | 4 ++-- runner/ollamarunner/runner.go | 2 +- server/prompt.go | 12 +++++++++++- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/model/input/input.go b/model/input/input.go index a1247bca7..0cb3f3f41 100644 --- a/model/input/input.go +++ b/model/input/input.go @@ -15,12 +15,6 @@ type Input struct { // stored in Multimodal, used for caching and comparing // equality. MultimodalHash uint64 - - // BatchBreak forces a new batch to be started with this - // input. For example, this can be used to align images - // with batches. Note that batches may be divided in additional - // locations as well. - BatchBreak bool } // MultimodalIndex is a multimodal element (such as an image) diff --git a/model/models/gemma3/model.go b/model/models/gemma3/model.go index 2fe04348d..7418bb12f 100644 --- a/model/models/gemma3/model.go +++ b/model/models/gemma3/model.go @@ -112,8 +112,8 @@ func (m *Model) PostTokenize(ctx ml.Context, inputs []input.Input) ([]input.Inpu result = append(result, inp) } else { imageInputs := []input.Input{ - {Token: 108}, // "\n\n" - {Token: 255999, BatchBreak: true}, // """ + {Token: 108}, // "\n\n" + {Token: 255999}, // """ } result = append(result, imageInputs...) diff --git a/runner/ollamarunner/runner.go b/runner/ollamarunner/runner.go index 9b997bd37..c1475cbb2 100644 --- a/runner/ollamarunner/runner.go +++ b/runner/ollamarunner/runner.go @@ -363,7 +363,7 @@ func (s *Server) processBatch() error { } } - if j >= s.batchSize || (inp.BatchBreak && len(seq.pendingInputs) != 0) { + if j >= s.batchSize { break } diff --git a/server/prompt.go b/server/prompt.go index 5b5b958f1..d053f2a8d 100644 --- a/server/prompt.go +++ b/server/prompt.go @@ -26,6 +26,7 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api. var system []api.Message isMllama := checkMllamaModelFamily(m) + isGemma3 := checkGemma3ModelFamily(m) var imageNumTokens int // TODO: Ideally we would compute this from the projector metadata but some pieces are implementation dependent @@ -40,7 +41,7 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api. n := len(msgs) - 1 // in reverse, find all messages that fit into context window for i := n; i >= 0; i-- { - if isMllama && len(msgs[i].Images) > 1 { + if (isMllama || isGemma3) && len(msgs[i].Images) > 1 { return "", nil, errTooManyImages } @@ -157,3 +158,12 @@ func checkMllamaModelFamily(m *Model) bool { } return false } + +func checkGemma3ModelFamily(m *Model) bool { + for _, arch := range m.Config.ModelFamilies { + if arch == "gemma3" { + return true + } + } + return false +}