llm: Don't try to load split vision models in the Ollama engine

If a model with a split vision projector is loaded in the Ollama
engine, the projector will be ignored and the model will hallucinate
a response. Instead, fallback and try to load the model in the llama
engine.
This commit is contained in:
Jesse Gross
2025-09-10 11:03:06 -07:00
committed by Jesse Gross
parent eb10390de9
commit aba1575315

View File

@@ -149,7 +149,11 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
var textProcessor model.TextProcessor var textProcessor model.TextProcessor
var err error var err error
if envconfig.NewEngine() || f.KV().OllamaEngineRequired() { if envconfig.NewEngine() || f.KV().OllamaEngineRequired() {
if len(projectors) == 0 {
textProcessor, err = model.NewTextProcessor(modelPath) textProcessor, err = model.NewTextProcessor(modelPath)
} else {
err = errors.New("split vision models aren't supported")
}
if err != nil { if err != nil {
// To prepare for opt-out mode, instead of treating this as an error, we fallback to the old runner // To prepare for opt-out mode, instead of treating this as an error, we fallback to the old runner
slog.Debug("model not yet supported by Ollama engine, switching to compatibility mode", "model", modelPath, "error", err) slog.Debug("model not yet supported by Ollama engine, switching to compatibility mode", "model", modelPath, "error", err)