From c3c85aa06c1b7dcf8d7d811a4ebdf53d407faceb Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 15 Oct 2025 10:22:03 -0700 Subject: [PATCH] llm: Enable flash attention by default for gemma3 --- fs/ggml/ggml.go | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index 920eff74a2..fcb3d9fdb4 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -893,6 +893,7 @@ func (f GGML) SupportsFlashAttention() bool { // FlashAttention checks if the model should enable flash attention func (f GGML) FlashAttention() bool { return slices.Contains([]string{ + "gemma3", "gptoss", "gpt-oss", "qwen3", "qwen3moe",