ggml: Use ordinal IDs for AMD GPUs on Linux when UUID is unavailable

Some AMD GPUs do not provide UUIDs and report only "XX". In these cases, we should use the ordinal ID as an alternate identifier. This is the same as we always need to do on Windows for AMD. In addition, this prints out the ID for each GPU when enumerating them for easier debugging in the future.
2025-08-25 08:51:36 +02:00 · 2025-08-11 17:01:07 -07:00
parent d0cf6c8281
commit a343ae53a4
2 changed files with 148 additions and 80 deletions
--- a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
+++ b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
@@ -7,12 +7,12 @@ This enables matching up devices and information reported by the backend
 with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
 ---
 ggml/include/ggml-backend.h      |  1 +
- ggml/src/ggml-cuda/ggml-cuda.cu  | 39 ++++++++++++++++++++++++++++++++
+ ggml/src/ggml-cuda/ggml-cuda.cu  | 67 +++++++++++++++++++++++++++++---
 ggml/src/ggml-metal/ggml-metal.m |  1 +
- 3 files changed, 41 insertions(+)
+ 3 files changed, 63 insertions(+), 6 deletions(-)

 diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
-index 74e46716..48839339 100644
+index 74e467163..48839339d 100644
 --- a/ggml/include/ggml-backend.h
 +++ b/ggml/include/ggml-backend.h
@@ -152,6 +152,7 @@ extern "C" {
@@ -24,10 +24,93 @@ index 74e46716..48839339 100644
         size_t memory_total;
         enum ggml_backend_dev_type type;
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index cb0d8528..d6960174 100644
+index cb0d8528d..1492368de 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
-@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
+@@ -173,6 +173,51 @@ static int ggml_cuda_parse_id(char devName[]) {
+ }
+ #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
+ 
+static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) {
+    char id[64];
+
+    #if !defined(GGML_USE_HIP)
+    snprintf(id, sizeof(id),
+        "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+        (unsigned char)prop.uuid.bytes[0],
+        (unsigned char)prop.uuid.bytes[1],
+        (unsigned char)prop.uuid.bytes[2],
+        (unsigned char)prop.uuid.bytes[3],
+        (unsigned char)prop.uuid.bytes[4],
+        (unsigned char)prop.uuid.bytes[5],
+        (unsigned char)prop.uuid.bytes[6],
+        (unsigned char)prop.uuid.bytes[7],
+        (unsigned char)prop.uuid.bytes[8],
+        (unsigned char)prop.uuid.bytes[9],
+        (unsigned char)prop.uuid.bytes[10],
+        (unsigned char)prop.uuid.bytes[11],
+        (unsigned char)prop.uuid.bytes[12],
+        (unsigned char)prop.uuid.bytes[13],
+        (unsigned char)prop.uuid.bytes[14],
+        (unsigned char)prop.uuid.bytes[15]
+        );
+    #else
+    #ifdef _WIN32
+        snprintf(id, sizeof(id), "%d", device_num);
+    #else
+    try {
+        std::string uuid = std::string(prop.uuid.bytes, 16);
+
+        size_t pos = 0;
+        unsigned long long v = stoull(uuid, &pos, 16);
+        if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-'))
+            throw std::invalid_argument("invalid uuid");
+
+        snprintf(id, sizeof(id), "GPU-%016llx", v);
+    } catch (const std::exception &e) {
+        snprintf(id, sizeof(id), "%d", device_num);
+    }
+    #endif
+    #endif
+
+    return id;
+}
+
+ static ggml_cuda_device_info ggml_cuda_init() {
+ #ifdef __HIP_PLATFORM_AMD__
+     // Workaround for a rocBLAS bug when using multiple graphics cards:
+@@ -261,22 +306,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
+                 info.devices[id].cc += prop.minor * 0x10;
+             }
+         }
+-        GGML_LOG_INFO("  Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
+        GGML_LOG_INFO("  Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n",
+                       id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
+-                      device_vmm ? "yes" : "no", prop.warpSize);
+                      device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str());
+ #elif defined(GGML_USE_MUSA)
+         // FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
+         info.devices[id].warp_size = 32;
+         info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
+         info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
+         info.devices[id].cc += prop.minor * 0x10;
+-        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s\n",
+-                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
+        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
+                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
+                        ggml_cuda_parse_uuid(prop, id).c_str());
+ #else
+         info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
+         info.devices[id].cc = 100*prop.major + 10*prop.minor;
+-        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s\n",
+-                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
+        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
+                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
+                        ggml_cuda_parse_uuid(prop, id).c_str());
+ #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
+     }
+ 
+@@ -2884,6 +2931,7 @@ struct ggml_backend_cuda_device_context {
     int device;
     std::string name;
     std::string description;
@@ -35,7 +118,7 @@ index cb0d8528..d6960174 100644
 };
 
 static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
-@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
+@@ -2896,6 +2944,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
     return ctx->description.c_str();
 }
 
@@ -47,7 +130,7 @@ index cb0d8528..d6960174 100644
 static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
     ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
     ggml_cuda_set_device(ctx->device);
-@@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
+@@ -2910,6 +2963,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
 static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
     props->name        = ggml_backend_cuda_device_get_name(dev);
     props->description = ggml_backend_cuda_device_get_description(dev);
@@ -55,47 +138,16 @@ index cb0d8528..d6960174 100644
     props->type        = ggml_backend_cuda_device_get_type(dev);
     ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
 
-@@ -3458,6 +3465,38 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
+@@ -3457,6 +3511,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
+                 cudaDeviceProp prop;
                 CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
                 dev_ctx->description = prop.name;
+                dev_ctx->id = ggml_cuda_parse_uuid(prop, i);
 
-+                #if !defined(GGML_USE_HIP)
-+                char id[64];
-+                snprintf(id, sizeof(id),
-+                    "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
-+                    (unsigned char)prop.uuid.bytes[0],
-+                    (unsigned char)prop.uuid.bytes[1],
-+                    (unsigned char)prop.uuid.bytes[2],
-+                    (unsigned char)prop.uuid.bytes[3],
-+                    (unsigned char)prop.uuid.bytes[4],
-+                    (unsigned char)prop.uuid.bytes[5],
-+                    (unsigned char)prop.uuid.bytes[6],
-+                    (unsigned char)prop.uuid.bytes[7],
-+                    (unsigned char)prop.uuid.bytes[8],
-+                    (unsigned char)prop.uuid.bytes[9],
-+                    (unsigned char)prop.uuid.bytes[10],
-+                    (unsigned char)prop.uuid.bytes[11],
-+                    (unsigned char)prop.uuid.bytes[12],
-+                    (unsigned char)prop.uuid.bytes[13],
-+                    (unsigned char)prop.uuid.bytes[14],
-+                    (unsigned char)prop.uuid.bytes[15]
-+                  );
-+                dev_ctx->id = id;
-+                #else
-+                #ifdef _WIN32
-+                char id[16];
-+                snprintf(id, sizeof(id), "%d", i);
-+                dev_ctx->id = id;
-+                #else
-+                dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
-+                #endif
-+                #endif
-+
                 ggml_backend_dev_t dev = new ggml_backend_device {
                     /* .iface   = */ ggml_backend_cuda_device_interface,
-                     /* .reg     = */ &reg,
 diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
-index 1b56f858..a9eeebc6 100644
+index 1b56f858c..a9eeebc6a 100644
 --- a/ggml/src/ggml-metal/ggml-metal.m
 +++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
--- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -175,6 +175,51 @@ static int ggml_cuda_parse_id(char devName[]) {
 }
 #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)

+static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) {
+    char id[64];
+
+    #if !defined(GGML_USE_HIP)
+    snprintf(id, sizeof(id),
+        "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+        (unsigned char)prop.uuid.bytes[0],
+        (unsigned char)prop.uuid.bytes[1],
+        (unsigned char)prop.uuid.bytes[2],
+        (unsigned char)prop.uuid.bytes[3],
+        (unsigned char)prop.uuid.bytes[4],
+        (unsigned char)prop.uuid.bytes[5],
+        (unsigned char)prop.uuid.bytes[6],
+        (unsigned char)prop.uuid.bytes[7],
+        (unsigned char)prop.uuid.bytes[8],
+        (unsigned char)prop.uuid.bytes[9],
+        (unsigned char)prop.uuid.bytes[10],
+        (unsigned char)prop.uuid.bytes[11],
+        (unsigned char)prop.uuid.bytes[12],
+        (unsigned char)prop.uuid.bytes[13],
+        (unsigned char)prop.uuid.bytes[14],
+        (unsigned char)prop.uuid.bytes[15]
+        );
+    #else
+    #ifdef _WIN32
+        snprintf(id, sizeof(id), "%d", device_num);
+    #else
+    try {
+        std::string uuid = std::string(prop.uuid.bytes, 16);
+
+        size_t pos = 0;
+        unsigned long long v = stoull(uuid, &pos, 16);
+        if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-'))
+            throw std::invalid_argument("invalid uuid");
+
+        snprintf(id, sizeof(id), "GPU-%016llx", v);
+    } catch (const std::exception &e) {
+        snprintf(id, sizeof(id), "%d", device_num);
+    }
+    #endif
+    #endif
+
+    return id;
+}
+
 static ggml_cuda_device_info ggml_cuda_init() {
 #ifdef __HIP_PLATFORM_AMD__
    // Workaround for a rocBLAS bug when using multiple graphics cards:
@@ -263,22 +308,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
                info.devices[id].cc += prop.minor * 0x10;
            }
        }
-        GGML_LOG_INFO("  Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
+        GGML_LOG_INFO("  Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n",
                      id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
-                      device_vmm ? "yes" : "no", prop.warpSize);
+                      device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str());
 #elif defined(GGML_USE_MUSA)
        // FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
        info.devices[id].warp_size = 32;
        info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
        info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
        info.devices[id].cc += prop.minor * 0x10;
-        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s\n",
-                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
+        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
+                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
+                        ggml_cuda_parse_uuid(prop, id).c_str());
 #else
        info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
        info.devices[id].cc = 100*prop.major + 10*prop.minor;
-        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s\n",
-                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
+        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
+                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
+                        ggml_cuda_parse_uuid(prop, id).c_str());
 #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
    }

@@ -3475,38 +3522,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                cudaDeviceProp prop;
                CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
                dev_ctx->description = prop.name;
-
-                #if !defined(GGML_USE_HIP)
-                char id[64];
-                snprintf(id, sizeof(id),
-                    "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
-                    (unsigned char)prop.uuid.bytes[0],
-                    (unsigned char)prop.uuid.bytes[1],
-                    (unsigned char)prop.uuid.bytes[2],
-                    (unsigned char)prop.uuid.bytes[3],
-                    (unsigned char)prop.uuid.bytes[4],
-                    (unsigned char)prop.uuid.bytes[5],
-                    (unsigned char)prop.uuid.bytes[6],
-                    (unsigned char)prop.uuid.bytes[7],
-                    (unsigned char)prop.uuid.bytes[8],
-                    (unsigned char)prop.uuid.bytes[9],
-                    (unsigned char)prop.uuid.bytes[10],
-                    (unsigned char)prop.uuid.bytes[11],
-                    (unsigned char)prop.uuid.bytes[12],
-                    (unsigned char)prop.uuid.bytes[13],
-                    (unsigned char)prop.uuid.bytes[14],
-                    (unsigned char)prop.uuid.bytes[15]
-                  );
-                dev_ctx->id = id;
-                #else
-                #ifdef _WIN32
-                char id[16];
-                snprintf(id, sizeof(id), "%d", i);
-                dev_ctx->id = id;
-                #else
-                dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
-                #endif
-                #endif
+                dev_ctx->id = ggml_cuda_parse_uuid(prop, i);

                ggml_backend_dev_t dev = new ggml_backend_device {
                    /* .iface   = */ ggml_backend_cuda_device_interface,