runner: disable embedding models in ollama engine

This commit is contained in:
Michael Yang
2025-09-02 10:42:16 -07:00
parent 517807cdf2
commit 46e485f32c

View File

@@ -175,6 +175,10 @@ func (kv KV) Bools(key string, defaultValue ...[]bool) []bool {
} }
func (kv KV) OllamaEngineRequired() bool { func (kv KV) OllamaEngineRequired() bool {
if kv.Uint("pooling_type") > 0 {
return false
}
return slices.Contains([]string{ return slices.Contains([]string{
"gemma3", "gemma3",
"gemma3n", "gemma3n",
@@ -769,8 +773,7 @@ func (f GGML) SupportsKVCacheType(cacheType string) bool {
// SupportsFlashAttention checks if the model supports flash attention // SupportsFlashAttention checks if the model supports flash attention
func (f GGML) SupportsFlashAttention() bool { func (f GGML) SupportsFlashAttention() bool {
_, isEmbedding := f.KV()[fmt.Sprintf("%s.pooling_type", f.KV().Architecture())] if f.KV().Uint("pooling_type") > 0 {
if isEmbedding {
return false return false
} }