From aba157531521192a04d09811fac3cda20e1a8340 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@ollama.com>
Date: Wed, 10 Sep 2025 11:03:06 -0700
Subject: [PATCH] llm: Don't try to load split vision models in the Ollama
 engine

If a model with a split vision projector is loaded in the Ollama
engine, the projector will be ignored and the model will hallucinate
a response. Instead, fallback and try to load the model in the llama
engine.
---
 llm/server.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llm/server.go b/llm/server.go
index 5caf19875c..9100b69788 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -149,7 +149,11 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
 	var textProcessor model.TextProcessor
 	var err error
 	if envconfig.NewEngine() || f.KV().OllamaEngineRequired() {
-		textProcessor, err = model.NewTextProcessor(modelPath)
+		if len(projectors) == 0 {
+			textProcessor, err = model.NewTextProcessor(modelPath)
+		} else {
+			err = errors.New("split vision models aren't supported")
+		}
 		if err != nil {
 			// To prepare for opt-out mode, instead of treating this as an error, we fallback to the old runner
 			slog.Debug("model not yet supported by Ollama engine, switching to compatibility mode", "model", modelPath, "error", err)