ggml: Disable unused pipeline parallelism

We're not currently using it, even in cases where we could. Disabling it improves generation performance by 10-30% with multiple GPUs.
2025-08-25 09:51:25 +02:00 · 2025-07-10 16:55:34 -07:00
parent f8a6e88819
commit 9a43994c45
1 changed files with 1 additions and 1 deletions
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -418,7 +418,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
 			(*C.ggml_backend_buffer_type_t)(unsafe.Pointer(&schedBufts[0])),
 			C.int(len(schedBackends)),
 			C.size_t(maxGraphNodes),
-			C._Bool(len(gpus) > 1 && slices.Contains(gpus, output.d)),
+			C._Bool(false),
 			C._Bool(false),
 		),
 		schedBackends: schedBackends,