llm: Enable flash attention by default for gemma3

This commit is contained in:
Jesse Gross
2025-10-15 10:22:03 -07:00
committed by Jesse Gross
parent 0d713051a2
commit c3c85aa06c

View File

@@ -893,6 +893,7 @@ func (f GGML) SupportsFlashAttention() bool {
// FlashAttention checks if the model should enable flash attention
func (f GGML) FlashAttention() bool {
return slices.Contains([]string{
"gemma3",
"gptoss", "gpt-oss",
"qwen3",
"qwen3moe",