llm: Enable flash attention by default for gemma3

2025-11-12 10:57:51 +01:00 · 2025-10-15 10:22:03 -07:00
parent 0d713051a2
commit c3c85aa06c
1 changed files with 1 additions and 0 deletions
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -893,6 +893,7 @@ func (f GGML) SupportsFlashAttention() bool {
 // FlashAttention checks if the model should enable flash attention
 func (f GGML) FlashAttention() bool {
 	return slices.Contains([]string{
 		"gemma3",
 		"gptoss", "gpt-oss",
 		"qwen3",
 		"qwen3moe",