From 35fda7b4af556e7eeef2b5dcb3638435382b2576 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 25 Jun 2025 17:13:32 -0700 Subject: [PATCH] ggml: Report ordinal IDs for AMD GPUs on Windows We don't get valid UUIDs for AMD GPUs on Windows, so the best option is to use the ordinal IDs. This brings us in line with what we currently do on the Ollama server - the only exception is AMD GPUs on Linux, which falls back to using ordinal IDs. The GGML implementation has no fallback but it doesn't appear to occur for any of the GPUs that we support. It's also possible that there are collisions between ordinal IDs for different libraries - however the only places where we use them are AMD on Windows and Metal on Mac, which can never occur on the same system. --- .../patches/0017-ggml-Export-GPU-UUIDs.patch | 38 +++++++++++-------- ml/backend.go | 10 ++--- ml/backend/ggml/ggml.go | 4 +- ml/backend/ggml/ggml/include/ggml-backend.h | 2 +- .../ggml/ggml/src/ggml-cuda/ggml-cuda.cu | 22 +++++++---- .../ggml/ggml/src/ggml-metal/ggml-metal.m | 2 +- 6 files changed, 45 insertions(+), 33 deletions(-) diff --git a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch index a2539034c6..b7d56b0d88 100644 --- a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch +++ b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch @@ -7,31 +7,31 @@ This enables matching up devices and information reported by the backend with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml). --- ggml/include/ggml-backend.h | 1 + - ggml/src/ggml-cuda/ggml-cuda.cu | 33 ++++++++++++++++++++++++++++++++ + ggml/src/ggml-cuda/ggml-cuda.cu | 39 ++++++++++++++++++++++++++++++++ ggml/src/ggml-metal/ggml-metal.m | 1 + - 3 files changed, 35 insertions(+) + 3 files changed, 41 insertions(+) diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h -index 74e46716..a880df33 100644 +index 74e46716..48839339 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -152,6 +152,7 @@ extern "C" { struct ggml_backend_dev_props { const char * name; const char * description; -+ const char * uuid; ++ const char * id; size_t memory_free; size_t memory_total; enum ggml_backend_dev_type type; diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu -index cb0d8528..4c829153 100644 +index cb0d8528..d6960174 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context { int device; std::string name; std::string description; -+ std::string uuid; ++ std::string id; }; static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) { @@ -39,9 +39,9 @@ index cb0d8528..4c829153 100644 return ctx->description.c_str(); } -+static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) { ++static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) { + ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context; -+ return ctx->uuid.c_str(); ++ return ctx->id.c_str(); +} + static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { @@ -51,17 +51,17 @@ index cb0d8528..4c829153 100644 static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) { props->name = ggml_backend_cuda_device_get_name(dev); props->description = ggml_backend_cuda_device_get_description(dev); -+ props->uuid = ggml_backend_cuda_device_get_uuid(dev); ++ props->id = ggml_backend_cuda_device_get_id(dev); props->type = ggml_backend_cuda_device_get_type(dev); ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total); -@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { +@@ -3458,6 +3465,38 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { CUDA_CHECK(cudaGetDeviceProperties(&prop, i)); dev_ctx->description = prop.name; + #if !defined(GGML_USE_HIP) -+ char uuid[64]; -+ snprintf(uuid, sizeof(uuid), ++ char id[64]; ++ snprintf(id, sizeof(id), + "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + (unsigned char)prop.uuid.bytes[0], + (unsigned char)prop.uuid.bytes[1], @@ -80,23 +80,29 @@ index cb0d8528..4c829153 100644 + (unsigned char)prop.uuid.bytes[14], + (unsigned char)prop.uuid.bytes[15] + ); -+ dev_ctx->uuid = uuid; ++ dev_ctx->id = id; + #else -+ dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16); ++ #ifdef _WIN32 ++ char id[16]; ++ snprintf(id, sizeof(id), "%d", i); ++ dev_ctx->id = id; ++ #else ++ dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16); ++ #endif + #endif + ggml_backend_dev_t dev = new ggml_backend_device { /* .iface = */ ggml_backend_cuda_device_interface, /* .reg = */ ®, diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m -index 1b56f858..ee4f2dcb 100644 +index 1b56f858..a9eeebc6 100644 --- a/ggml/src/ggml-metal/ggml-metal.m +++ b/ggml/src/ggml-metal/ggml-metal.m @@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { props->name = ggml_backend_metal_device_get_name(dev); props->description = ggml_backend_metal_device_get_description(dev); -+ props->uuid = "0"; ++ props->id = "0"; props->type = ggml_backend_metal_device_get_type(dev); ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total); props->caps = (struct ggml_backend_dev_caps) { diff --git a/ml/backend.go b/ml/backend.go index 61066c1aa0..06f9de9ae8 100644 --- a/ml/backend.go +++ b/ml/backend.go @@ -124,9 +124,9 @@ type DeviceMemory struct { // may not be persistent across instances of the runner. Name string - // UUID is a unique persistent identifier for the device for matching - // with system management libraries - UUID string + // ID is an identifier for the device for matching with system + // management libraries. + ID string // Weights is the per-layer memory needed for the model weights. Weights []Memory @@ -156,8 +156,8 @@ func (m DeviceMemory) LogValue() slog.Value { attrs = append(attrs, slog.Any("Graph", m.Graph)) } - if len(attrs) > 0 && m.UUID != "" { - attrs = append([]slog.Attr{slog.String("UUID", m.UUID)}, attrs...) + if len(attrs) > 0 && m.ID != "" { + attrs = append([]slog.Attr{slog.String("ID", m.ID)}, attrs...) } return slog.GroupValue(attrs...) diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index 4f1212de4c..680910f8dd 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -138,7 +138,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) { requiredMemory.CPU.Name = C.GoString(C.ggml_backend_dev_name(cpuDeviceBufferType.d)) var props C.struct_ggml_backend_dev_props C.ggml_backend_dev_get_props(cpuDeviceBufferType.d, &props) - requiredMemory.CPU.UUID = C.GoString(props.uuid) + requiredMemory.CPU.ID = C.GoString(props.id) requiredMemory.CPU.Weights = make([]ml.Memory, blocks+1) requiredMemory.CPU.Cache = make([]ml.Memory, blocks+1) @@ -155,7 +155,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) { requiredMemory.GPUs[i].Name = C.GoString(C.ggml_backend_dev_name(d)) var props C.struct_ggml_backend_dev_props C.ggml_backend_dev_get_props(d, &props) - requiredMemory.GPUs[i].UUID = C.GoString(props.uuid) + requiredMemory.GPUs[i].ID = C.GoString(props.id) requiredMemory.GPUs[i].Weights = make([]ml.Memory, blocks+1) requiredMemory.GPUs[i].Cache = make([]ml.Memory, blocks+1) } diff --git a/ml/backend/ggml/ggml/include/ggml-backend.h b/ml/backend/ggml/ggml/include/ggml-backend.h index a880df33ee..48839339d1 100644 --- a/ml/backend/ggml/ggml/include/ggml-backend.h +++ b/ml/backend/ggml/ggml/include/ggml-backend.h @@ -152,7 +152,7 @@ extern "C" { struct ggml_backend_dev_props { const char * name; const char * description; - const char * uuid; + const char * id; size_t memory_free; size_t memory_total; enum ggml_backend_dev_type type; diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu index 9e64e5ae44..2b9fabf4f5 100644 --- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu @@ -2888,7 +2888,7 @@ struct ggml_backend_cuda_device_context { int device; std::string name; std::string description; - std::string uuid; + std::string id; }; static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) { @@ -2901,9 +2901,9 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t return ctx->description.c_str(); } -static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) { +static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) { ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context; - return ctx->uuid.c_str(); + return ctx->id.c_str(); } static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { @@ -2920,7 +2920,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) { props->name = ggml_backend_cuda_device_get_name(dev); props->description = ggml_backend_cuda_device_get_description(dev); - props->uuid = ggml_backend_cuda_device_get_uuid(dev); + props->id = ggml_backend_cuda_device_get_id(dev); props->type = ggml_backend_cuda_device_get_type(dev); ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total); @@ -3471,8 +3471,8 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { dev_ctx->description = prop.name; #if !defined(GGML_USE_HIP) - char uuid[64]; - snprintf(uuid, sizeof(uuid), + char id[64]; + snprintf(id, sizeof(id), "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", (unsigned char)prop.uuid.bytes[0], (unsigned char)prop.uuid.bytes[1], @@ -3491,9 +3491,15 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { (unsigned char)prop.uuid.bytes[14], (unsigned char)prop.uuid.bytes[15] ); - dev_ctx->uuid = uuid; + dev_ctx->id = id; #else - dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16); + #ifdef _WIN32 + char id[16]; + snprintf(id, sizeof(id), "%d", i); + dev_ctx->id = id; + #else + dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16); + #endif #endif ggml_backend_dev_t dev = new ggml_backend_device { diff --git a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m index f20f5615ec..110c9ece9f 100644 --- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m +++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m @@ -5726,7 +5726,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { props->name = ggml_backend_metal_device_get_name(dev); props->description = ggml_backend_metal_device_get_description(dev); - props->uuid = "0"; + props->id = "0"; props->type = ggml_backend_metal_device_get_type(dev); ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total); props->caps = (struct ggml_backend_dev_caps) {