recent llama.cpp update added kernels for fp32, q5_0, and q5_1

2025-03-29 11:11:47 +01:00 · 2023-11-20 13:44:12 -08:00 · 2023-11-20 13:44:12 -08:00 · 19b7a4d715
commit 19b7a4d715
parent 8c4022b06b
1 changed files with 1 additions and 8 deletions
--- a/llm/llm.go
+++ b/llm/llm.go
@ -41,20 +41,13 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error

 	if runtime.GOOS == "darwin" {
 		switch ggml.FileType() {
-		case "Q8_0":
+		case "F32", "Q5_0", "Q5_1", "Q8_0":
 			if ggml.Name() != "gguf" && opts.NumGPU != 0 {
 				// GGML Q8_0 do not support Metal API and will
 				// cause the runner to segmentation fault so disable GPU
 				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
 				opts.NumGPU = 0
 			}
-		case "F32", "Q5_0", "Q5_1":
-			if opts.NumGPU != 0 {
-				// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
-				// cause the runner to segmentation fault so disable GPU
-				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
-				opts.NumGPU = 0
-			}
 		}

 		var requiredMemory int64