diff --git a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
index b7d56b0d88..2bd938a3f5 100644
--- a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
+++ b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
@@ -7,12 +7,12 @@ This enables matching up devices and information reported by the backend
 with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
 ---
  ggml/include/ggml-backend.h      |  1 +
- ggml/src/ggml-cuda/ggml-cuda.cu  | 39 ++++++++++++++++++++++++++++++++
+ ggml/src/ggml-cuda/ggml-cuda.cu  | 67 +++++++++++++++++++++++++++++---
  ggml/src/ggml-metal/ggml-metal.m |  1 +
- 3 files changed, 41 insertions(+)
+ 3 files changed, 63 insertions(+), 6 deletions(-)
 
 diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
-index 74e46716..48839339 100644
+index 74e467163..48839339d 100644
 --- a/ggml/include/ggml-backend.h
 +++ b/ggml/include/ggml-backend.h
 @@ -152,6 +152,7 @@ extern "C" {
@@ -24,10 +24,93 @@ index 74e46716..48839339 100644
          size_t memory_total;
          enum ggml_backend_dev_type type;
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index cb0d8528..d6960174 100644
+index cb0d8528d..1492368de 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
-@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
+@@ -173,6 +173,51 @@ static int ggml_cuda_parse_id(char devName[]) {
+ }
+ #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
+ 
++static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) {
++    char id[64];
++
++    #if !defined(GGML_USE_HIP)
++    snprintf(id, sizeof(id),
++        "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
++        (unsigned char)prop.uuid.bytes[0],
++        (unsigned char)prop.uuid.bytes[1],
++        (unsigned char)prop.uuid.bytes[2],
++        (unsigned char)prop.uuid.bytes[3],
++        (unsigned char)prop.uuid.bytes[4],
++        (unsigned char)prop.uuid.bytes[5],
++        (unsigned char)prop.uuid.bytes[6],
++        (unsigned char)prop.uuid.bytes[7],
++        (unsigned char)prop.uuid.bytes[8],
++        (unsigned char)prop.uuid.bytes[9],
++        (unsigned char)prop.uuid.bytes[10],
++        (unsigned char)prop.uuid.bytes[11],
++        (unsigned char)prop.uuid.bytes[12],
++        (unsigned char)prop.uuid.bytes[13],
++        (unsigned char)prop.uuid.bytes[14],
++        (unsigned char)prop.uuid.bytes[15]
++        );
++    #else
++    #ifdef _WIN32
++        snprintf(id, sizeof(id), "%d", device_num);
++    #else
++    try {
++        std::string uuid = std::string(prop.uuid.bytes, 16);
++
++        size_t pos = 0;
++        unsigned long long v = stoull(uuid, &pos, 16);
++        if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-'))
++            throw std::invalid_argument("invalid uuid");
++
++        snprintf(id, sizeof(id), "GPU-%016llx", v);
++    } catch (const std::exception &e) {
++        snprintf(id, sizeof(id), "%d", device_num);
++    }
++    #endif
++    #endif
++
++    return id;
++}
++
+ static ggml_cuda_device_info ggml_cuda_init() {
+ #ifdef __HIP_PLATFORM_AMD__
+     // Workaround for a rocBLAS bug when using multiple graphics cards:
+@@ -261,22 +306,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
+                 info.devices[id].cc += prop.minor * 0x10;
+             }
+         }
+-        GGML_LOG_INFO("  Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
++        GGML_LOG_INFO("  Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n",
+                       id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
+-                      device_vmm ? "yes" : "no", prop.warpSize);
++                      device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str());
+ #elif defined(GGML_USE_MUSA)
+         // FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
+         info.devices[id].warp_size = 32;
+         info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
+         info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
+         info.devices[id].cc += prop.minor * 0x10;
+-        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s\n",
+-                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
++        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
++                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
++                        ggml_cuda_parse_uuid(prop, id).c_str());
+ #else
+         info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
+         info.devices[id].cc = 100*prop.major + 10*prop.minor;
+-        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s\n",
+-                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
++        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
++                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
++                        ggml_cuda_parse_uuid(prop, id).c_str());
+ #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
+     }
+ 
+@@ -2884,6 +2931,7 @@ struct ggml_backend_cuda_device_context {
      int device;
      std::string name;
      std::string description;
@@ -35,7 +118,7 @@ index cb0d8528..d6960174 100644
  };
  
  static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
-@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
+@@ -2896,6 +2944,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
      return ctx->description.c_str();
  }
  
@@ -47,7 +130,7 @@ index cb0d8528..d6960174 100644
  static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
      ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
      ggml_cuda_set_device(ctx->device);
-@@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
+@@ -2910,6 +2963,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
  static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
      props->name        = ggml_backend_cuda_device_get_name(dev);
      props->description = ggml_backend_cuda_device_get_description(dev);
@@ -55,47 +138,16 @@ index cb0d8528..d6960174 100644
      props->type        = ggml_backend_cuda_device_get_type(dev);
      ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
  
-@@ -3458,6 +3465,38 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
+@@ -3457,6 +3511,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
+                 cudaDeviceProp prop;
                  CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
                  dev_ctx->description = prop.name;
++                dev_ctx->id = ggml_cuda_parse_uuid(prop, i);
  
-+                #if !defined(GGML_USE_HIP)
-+                char id[64];
-+                snprintf(id, sizeof(id),
-+                    "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
-+                    (unsigned char)prop.uuid.bytes[0],
-+                    (unsigned char)prop.uuid.bytes[1],
-+                    (unsigned char)prop.uuid.bytes[2],
-+                    (unsigned char)prop.uuid.bytes[3],
-+                    (unsigned char)prop.uuid.bytes[4],
-+                    (unsigned char)prop.uuid.bytes[5],
-+                    (unsigned char)prop.uuid.bytes[6],
-+                    (unsigned char)prop.uuid.bytes[7],
-+                    (unsigned char)prop.uuid.bytes[8],
-+                    (unsigned char)prop.uuid.bytes[9],
-+                    (unsigned char)prop.uuid.bytes[10],
-+                    (unsigned char)prop.uuid.bytes[11],
-+                    (unsigned char)prop.uuid.bytes[12],
-+                    (unsigned char)prop.uuid.bytes[13],
-+                    (unsigned char)prop.uuid.bytes[14],
-+                    (unsigned char)prop.uuid.bytes[15]
-+                  );
-+                dev_ctx->id = id;
-+                #else
-+                #ifdef _WIN32
-+                char id[16];
-+                snprintf(id, sizeof(id), "%d", i);
-+                dev_ctx->id = id;
-+                #else
-+                dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
-+                #endif
-+                #endif
-+
                  ggml_backend_dev_t dev = new ggml_backend_device {
                      /* .iface   = */ ggml_backend_cuda_device_interface,
-                     /* .reg     = */ &reg,
 diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
-index 1b56f858..a9eeebc6 100644
+index 1b56f858c..a9eeebc6a 100644
 --- a/ggml/src/ggml-metal/ggml-metal.m
 +++ b/ggml/src/ggml-metal/ggml-metal.m
 @@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
index 080e7467b0..496973adf2 100644
--- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -175,6 +175,51 @@ static int ggml_cuda_parse_id(char devName[]) {
 }
 #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
 
+static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) {
+    char id[64];
+
+    #if !defined(GGML_USE_HIP)
+    snprintf(id, sizeof(id),
+        "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+        (unsigned char)prop.uuid.bytes[0],
+        (unsigned char)prop.uuid.bytes[1],
+        (unsigned char)prop.uuid.bytes[2],
+        (unsigned char)prop.uuid.bytes[3],
+        (unsigned char)prop.uuid.bytes[4],
+        (unsigned char)prop.uuid.bytes[5],
+        (unsigned char)prop.uuid.bytes[6],
+        (unsigned char)prop.uuid.bytes[7],
+        (unsigned char)prop.uuid.bytes[8],
+        (unsigned char)prop.uuid.bytes[9],
+        (unsigned char)prop.uuid.bytes[10],
+        (unsigned char)prop.uuid.bytes[11],
+        (unsigned char)prop.uuid.bytes[12],
+        (unsigned char)prop.uuid.bytes[13],
+        (unsigned char)prop.uuid.bytes[14],
+        (unsigned char)prop.uuid.bytes[15]
+        );
+    #else
+    #ifdef _WIN32
+        snprintf(id, sizeof(id), "%d", device_num);
+    #else
+    try {
+        std::string uuid = std::string(prop.uuid.bytes, 16);
+
+        size_t pos = 0;
+        unsigned long long v = stoull(uuid, &pos, 16);
+        if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-'))
+            throw std::invalid_argument("invalid uuid");
+
+        snprintf(id, sizeof(id), "GPU-%016llx", v);
+    } catch (const std::exception &e) {
+        snprintf(id, sizeof(id), "%d", device_num);
+    }
+    #endif
+    #endif
+
+    return id;
+}
+
 static ggml_cuda_device_info ggml_cuda_init() {
 #ifdef __HIP_PLATFORM_AMD__
     // Workaround for a rocBLAS bug when using multiple graphics cards:
@@ -263,22 +308,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
                 info.devices[id].cc += prop.minor * 0x10;
             }
         }
-        GGML_LOG_INFO("  Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
+        GGML_LOG_INFO("  Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n",
                       id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
-                      device_vmm ? "yes" : "no", prop.warpSize);
+                      device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str());
 #elif defined(GGML_USE_MUSA)
         // FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
         info.devices[id].warp_size = 32;
         info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
         info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
         info.devices[id].cc += prop.minor * 0x10;
-        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s\n",
-                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
+        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
+                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
+                        ggml_cuda_parse_uuid(prop, id).c_str());
 #else
         info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
         info.devices[id].cc = 100*prop.major + 10*prop.minor;
-        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s\n",
-                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
+        GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
+                        id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
+                        ggml_cuda_parse_uuid(prop, id).c_str());
 #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
     }
 
@@ -3475,38 +3522,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                 cudaDeviceProp prop;
                 CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
                 dev_ctx->description = prop.name;
-
-                #if !defined(GGML_USE_HIP)
-                char id[64];
-                snprintf(id, sizeof(id),
-                    "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
-                    (unsigned char)prop.uuid.bytes[0],
-                    (unsigned char)prop.uuid.bytes[1],
-                    (unsigned char)prop.uuid.bytes[2],
-                    (unsigned char)prop.uuid.bytes[3],
-                    (unsigned char)prop.uuid.bytes[4],
-                    (unsigned char)prop.uuid.bytes[5],
-                    (unsigned char)prop.uuid.bytes[6],
-                    (unsigned char)prop.uuid.bytes[7],
-                    (unsigned char)prop.uuid.bytes[8],
-                    (unsigned char)prop.uuid.bytes[9],
-                    (unsigned char)prop.uuid.bytes[10],
-                    (unsigned char)prop.uuid.bytes[11],
-                    (unsigned char)prop.uuid.bytes[12],
-                    (unsigned char)prop.uuid.bytes[13],
-                    (unsigned char)prop.uuid.bytes[14],
-                    (unsigned char)prop.uuid.bytes[15]
-                  );
-                dev_ctx->id = id;
-                #else
-                #ifdef _WIN32
-                char id[16];
-                snprintf(id, sizeof(id), "%d", i);
-                dev_ctx->id = id;
-                #else
-                dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
-                #endif
-                #endif
+                dev_ctx->id = ggml_cuda_parse_uuid(prop, i);
 
                 ggml_backend_dev_t dev = new ggml_backend_device {
                     /* .iface   = */ ggml_backend_cuda_device_interface,