From 8f4ec9ab289fd2a1f96384926a7f7bfd888d4ef9 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@ollama.com>
Date: Mon, 11 Aug 2025 14:45:45 -0700
Subject: [PATCH] discover: CPU supports flash attention

We already run flash attention on CPUs in cases where we have
partial offloading but were disabling it if running on pure CPU,
 which is unnecessary.
---
 discover/types.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/discover/types.go b/discover/types.go
index c5212d94e0..13a030fd59 100644
--- a/discover/types.go
+++ b/discover/types.go
@@ -171,7 +171,8 @@ func (si SystemInfo) GetOptimalThreadCount() int {
 // For each GPU, check if it does NOT support flash attention
 func (l GpuInfoList) FlashAttentionSupported() bool {
 	for _, gpu := range l {
-		supportsFA := gpu.Library == "metal" ||
+		supportsFA := gpu.Library == "cpu" ||
+			gpu.Library == "metal" ||
 			(gpu.Library == "cuda" && gpu.DriverMajor >= 7) ||
 			gpu.Library == "rocm"