From aba157531521192a04d09811fac3cda20e1a8340 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 10 Sep 2025 11:03:06 -0700 Subject: [PATCH] llm: Don't try to load split vision models in the Ollama engine If a model with a split vision projector is loaded in the Ollama engine, the projector will be ignored and the model will hallucinate a response. Instead, fallback and try to load the model in the llama engine. --- llm/server.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llm/server.go b/llm/server.go index 5caf19875c..9100b69788 100644 --- a/llm/server.go +++ b/llm/server.go @@ -149,7 +149,11 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a var textProcessor model.TextProcessor var err error if envconfig.NewEngine() || f.KV().OllamaEngineRequired() { - textProcessor, err = model.NewTextProcessor(modelPath) + if len(projectors) == 0 { + textProcessor, err = model.NewTextProcessor(modelPath) + } else { + err = errors.New("split vision models aren't supported") + } if err != nil { // To prepare for opt-out mode, instead of treating this as an error, we fallback to the old runner slog.Debug("model not yet supported by Ollama engine, switching to compatibility mode", "model", modelPath, "error", err)