From 8f4ec9ab289fd2a1f96384926a7f7bfd888d4ef9 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 11 Aug 2025 14:45:45 -0700 Subject: [PATCH] discover: CPU supports flash attention We already run flash attention on CPUs in cases where we have partial offloading but were disabling it if running on pure CPU, which is unnecessary. --- discover/types.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/discover/types.go b/discover/types.go index c5212d94e0..13a030fd59 100644 --- a/discover/types.go +++ b/discover/types.go @@ -171,7 +171,8 @@ func (si SystemInfo) GetOptimalThreadCount() int { // For each GPU, check if it does NOT support flash attention func (l GpuInfoList) FlashAttentionSupported() bool { for _, gpu := range l { - supportsFA := gpu.Library == "metal" || + supportsFA := gpu.Library == "cpu" || + gpu.Library == "metal" || (gpu.Library == "cuda" && gpu.DriverMajor >= 7) || gpu.Library == "rocm"