ggml: Report ordinal IDs for AMD GPUs on Windows

We don't get valid UUIDs for AMD GPUs on Windows, so the best option is to use the ordinal IDs. This brings us in line with what we currently do on the Ollama server - the only exception is AMD GPUs on Linux, which falls back to using ordinal IDs. The GGML implementation has no fallback but it doesn't appear to occur for any of the GPUs that we support. It's also possible that there are collisions between ordinal IDs for different libraries - however the only places where we use them are AMD on Windows and Metal on Mac, which can never occur on the same system.
2025-09-01 07:51:35 +02:00 · 2025-06-25 17:13:32 -07:00
parent 66fb8575ce
commit 35fda7b4af
6 changed files with 45 additions and 33 deletions
--- a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
+++ b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
@@ -7,31 +7,31 @@ This enables matching up devices and information reported by the backend
 with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
 ---
 ggml/include/ggml-backend.h      |  1 +
- ggml/src/ggml-cuda/ggml-cuda.cu  | 33 ++++++++++++++++++++++++++++++++
+ ggml/src/ggml-cuda/ggml-cuda.cu  | 39 ++++++++++++++++++++++++++++++++
 ggml/src/ggml-metal/ggml-metal.m |  1 +
- 3 files changed, 35 insertions(+)
+ 3 files changed, 41 insertions(+)

 diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
-index 74e46716..a880df33 100644
+index 74e46716..48839339 100644
 --- a/ggml/include/ggml-backend.h
 +++ b/ggml/include/ggml-backend.h
@@ -152,6 +152,7 @@ extern "C" {
     struct ggml_backend_dev_props {
         const char * name;
         const char * description;
-+        const char * uuid;
+        const char * id;
         size_t memory_free;
         size_t memory_total;
         enum ggml_backend_dev_type type;
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index cb0d8528..4c829153 100644
+index cb0d8528..d6960174 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
     int device;
     std::string name;
     std::string description;
-+    std::string uuid;
+    std::string id;
 };
 
 static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -39,9 +39,9 @@ index cb0d8528..4c829153 100644
     return ctx->description.c_str();
 }
 
-+static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
+static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
 +    ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
-+    return ctx->uuid.c_str();
+    return ctx->id.c_str();
 +}
 +
 static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
@@ -51,17 +51,17 @@ index cb0d8528..4c829153 100644
 static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
     props->name        = ggml_backend_cuda_device_get_name(dev);
     props->description = ggml_backend_cuda_device_get_description(dev);
-+    props->uuid        = ggml_backend_cuda_device_get_uuid(dev);
+    props->id          = ggml_backend_cuda_device_get_id(dev);
     props->type        = ggml_backend_cuda_device_get_type(dev);
     ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
 
-@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
+@@ -3458,6 +3465,38 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                 CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
                 dev_ctx->description = prop.name;
 
 +                #if !defined(GGML_USE_HIP)
-+                char uuid[64];
-+                snprintf(uuid, sizeof(uuid),
+                char id[64];
+                snprintf(id, sizeof(id),
 +                    "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
 +                    (unsigned char)prop.uuid.bytes[0],
 +                    (unsigned char)prop.uuid.bytes[1],
@@ -80,23 +80,29 @@ index cb0d8528..4c829153 100644
 +                    (unsigned char)prop.uuid.bytes[14],
 +                    (unsigned char)prop.uuid.bytes[15]
 +                  );
-+                dev_ctx->uuid = uuid;
+                dev_ctx->id = id;
 +                #else
-+                dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
+                #ifdef _WIN32
+                char id[16];
+                snprintf(id, sizeof(id), "%d", i);
+                dev_ctx->id = id;
+                #else
+                dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
+                #endif
 +                #endif
 +
                 ggml_backend_dev_t dev = new ggml_backend_device {
                     /* .iface   = */ ggml_backend_cuda_device_interface,
                     /* .reg     = */ &reg,
 diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
-index 1b56f858..ee4f2dcb 100644
+index 1b56f858..a9eeebc6 100644
 --- a/ggml/src/ggml-metal/ggml-metal.m
 +++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
 static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
     props->name        = ggml_backend_metal_device_get_name(dev);
     props->description = ggml_backend_metal_device_get_description(dev);
-+    props->uuid        = "0";
+    props->id          = "0";
     props->type        = ggml_backend_metal_device_get_type(dev);
     ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
     props->caps = (struct ggml_backend_dev_caps) {
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -124,9 +124,9 @@ type DeviceMemory struct {
 	// may not be persistent across instances of the runner.
 	Name string

-	// UUID is a unique persistent identifier for the device for matching
-	// with system management libraries
-	UUID string
+	// ID is an identifier for the device for matching with system
+	// management libraries.
+	ID string

 	// Weights is the per-layer memory needed for the model weights.
 	Weights []Memory
@@ -156,8 +156,8 @@ func (m DeviceMemory) LogValue() slog.Value {
 		attrs = append(attrs, slog.Any("Graph", m.Graph))
 	}

-	if len(attrs) > 0 && m.UUID != "" {
-		attrs = append([]slog.Attr{slog.String("UUID", m.UUID)}, attrs...)
+	if len(attrs) > 0 && m.ID != "" {
+		attrs = append([]slog.Attr{slog.String("ID", m.ID)}, attrs...)
 	}

 	return slog.GroupValue(attrs...)
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -138,7 +138,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
 	requiredMemory.CPU.Name = C.GoString(C.ggml_backend_dev_name(cpuDeviceBufferType.d))
 	var props C.struct_ggml_backend_dev_props
 	C.ggml_backend_dev_get_props(cpuDeviceBufferType.d, &props)
-	requiredMemory.CPU.UUID = C.GoString(props.uuid)
+	requiredMemory.CPU.ID = C.GoString(props.id)
 	requiredMemory.CPU.Weights = make([]ml.Memory, blocks+1)
 	requiredMemory.CPU.Cache = make([]ml.Memory, blocks+1)

@@ -155,7 +155,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
 		requiredMemory.GPUs[i].Name = C.GoString(C.ggml_backend_dev_name(d))
 		var props C.struct_ggml_backend_dev_props
 		C.ggml_backend_dev_get_props(d, &props)
-		requiredMemory.GPUs[i].UUID = C.GoString(props.uuid)
+		requiredMemory.GPUs[i].ID = C.GoString(props.id)
 		requiredMemory.GPUs[i].Weights = make([]ml.Memory, blocks+1)
 		requiredMemory.GPUs[i].Cache = make([]ml.Memory, blocks+1)
 	}
--- a/ml/backend/ggml/ggml/include/ggml-backend.h
+++ b/ml/backend/ggml/ggml/include/ggml-backend.h
@@ -152,7 +152,7 @@ extern "C" {
    struct ggml_backend_dev_props {
        const char * name;
        const char * description;
-        const char * uuid;
+        const char * id;
        size_t memory_free;
        size_t memory_total;
        enum ggml_backend_dev_type type;
--- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2888,7 +2888,7 @@ struct ggml_backend_cuda_device_context {
    int device;
    std::string name;
    std::string description;
-    std::string uuid;
+    std::string id;
 };

 static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -2901,9 +2901,9 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
    return ctx->description.c_str();
 }

-static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
+static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
    ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
-    return ctx->uuid.c_str();
+    return ctx->id.c_str();
 }

 static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
@@ -2920,7 +2920,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
 static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
    props->name        = ggml_backend_cuda_device_get_name(dev);
    props->description = ggml_backend_cuda_device_get_description(dev);
-    props->uuid        = ggml_backend_cuda_device_get_uuid(dev);
+    props->id          = ggml_backend_cuda_device_get_id(dev);
    props->type        = ggml_backend_cuda_device_get_type(dev);
    ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);

@@ -3471,8 +3471,8 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                dev_ctx->description = prop.name;

                #if !defined(GGML_USE_HIP)
-                char uuid[64];
-                snprintf(uuid, sizeof(uuid),
+                char id[64];
+                snprintf(id, sizeof(id),
                    "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
                    (unsigned char)prop.uuid.bytes[0],
                    (unsigned char)prop.uuid.bytes[1],
@@ -3491,9 +3491,15 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                    (unsigned char)prop.uuid.bytes[14],
                    (unsigned char)prop.uuid.bytes[15]
                  );
-                dev_ctx->uuid = uuid;
+                dev_ctx->id = id;
                #else
-                dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
+                #ifdef _WIN32
+                char id[16];
+                snprintf(id, sizeof(id), "%d", i);
+                dev_ctx->id = id;
+                #else
+                dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
+                #endif
                #endif

                ggml_backend_dev_t dev = new ggml_backend_device {
--- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
+++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
@@ -5726,7 +5726,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
 static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
    props->name        = ggml_backend_metal_device_get_name(dev);
    props->description = ggml_backend_metal_device_get_description(dev);
-    props->uuid        = "0";
+    props->id          = "0";
    props->type        = ggml_backend_metal_device_get_type(dev);
    ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
    props->caps = (struct ggml_backend_dev_caps) {