From 75e75d9afea9175f8bd1f0b5fe8ad6e9efbf86ee Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Thu, 30 Oct 2025 10:51:37 -0700 Subject: [PATCH] qwen3vl: enable flash attention by default (#12862) --- fs/ggml/ggml.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index c0ca068ab6..909104c642 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -895,8 +895,8 @@ func (f GGML) FlashAttention() bool { return slices.Contains([]string{ "gemma3", "gptoss", "gpt-oss", - "qwen3", - "qwen3moe", + "qwen3", "qwen3moe", + "qwen3vl", "qwen3vlmoe", }, f.KV().String("general.architecture")) }