From 46e485f32cda551c2b39876bcf05eee2d1a559fb Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Tue, 2 Sep 2025 10:42:16 -0700 Subject: [PATCH] runner: disable embedding models in ollama engine --- fs/ggml/ggml.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index 3f4374cd00..2eb1dbddeb 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -175,6 +175,10 @@ func (kv KV) Bools(key string, defaultValue ...[]bool) []bool { } func (kv KV) OllamaEngineRequired() bool { + if kv.Uint("pooling_type") > 0 { + return false + } + return slices.Contains([]string{ "gemma3", "gemma3n", @@ -769,8 +773,7 @@ func (f GGML) SupportsKVCacheType(cacheType string) bool { // SupportsFlashAttention checks if the model supports flash attention func (f GGML) SupportsFlashAttention() bool { - _, isEmbedding := f.KV()[fmt.Sprintf("%s.pooling_type", f.KV().Architecture())] - if isEmbedding { + if f.KV().Uint("pooling_type") > 0 { return false }