mirror of
https://github.com/ollama/ollama.git
synced 2025-08-25 08:51:36 +02:00
ggml: Use ordinal IDs for AMD GPUs on Linux when UUID is unavailable
Some AMD GPUs do not provide UUIDs and report only "XX". In these cases, we should use the ordinal ID as an alternate identifier. This is the same as we always need to do on Windows for AMD. In addition, this prints out the ID for each GPU when enumerating them for easier debugging in the future.
This commit is contained in:
@@ -7,12 +7,12 @@ This enables matching up devices and information reported by the backend
|
||||
with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
|
||||
---
|
||||
ggml/include/ggml-backend.h | 1 +
|
||||
ggml/src/ggml-cuda/ggml-cuda.cu | 39 ++++++++++++++++++++++++++++++++
|
||||
ggml/src/ggml-cuda/ggml-cuda.cu | 67 +++++++++++++++++++++++++++++---
|
||||
ggml/src/ggml-metal/ggml-metal.m | 1 +
|
||||
3 files changed, 41 insertions(+)
|
||||
3 files changed, 63 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
|
||||
index 74e46716..48839339 100644
|
||||
index 74e467163..48839339d 100644
|
||||
--- a/ggml/include/ggml-backend.h
|
||||
+++ b/ggml/include/ggml-backend.h
|
||||
@@ -152,6 +152,7 @@ extern "C" {
|
||||
@@ -24,10 +24,93 @@ index 74e46716..48839339 100644
|
||||
size_t memory_total;
|
||||
enum ggml_backend_dev_type type;
|
||||
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
index cb0d8528..d6960174 100644
|
||||
index cb0d8528d..1492368de 100644
|
||||
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
|
||||
@@ -173,6 +173,51 @@ static int ggml_cuda_parse_id(char devName[]) {
|
||||
}
|
||||
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
||||
|
||||
+static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) {
|
||||
+ char id[64];
|
||||
+
|
||||
+ #if !defined(GGML_USE_HIP)
|
||||
+ snprintf(id, sizeof(id),
|
||||
+ "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
||||
+ (unsigned char)prop.uuid.bytes[0],
|
||||
+ (unsigned char)prop.uuid.bytes[1],
|
||||
+ (unsigned char)prop.uuid.bytes[2],
|
||||
+ (unsigned char)prop.uuid.bytes[3],
|
||||
+ (unsigned char)prop.uuid.bytes[4],
|
||||
+ (unsigned char)prop.uuid.bytes[5],
|
||||
+ (unsigned char)prop.uuid.bytes[6],
|
||||
+ (unsigned char)prop.uuid.bytes[7],
|
||||
+ (unsigned char)prop.uuid.bytes[8],
|
||||
+ (unsigned char)prop.uuid.bytes[9],
|
||||
+ (unsigned char)prop.uuid.bytes[10],
|
||||
+ (unsigned char)prop.uuid.bytes[11],
|
||||
+ (unsigned char)prop.uuid.bytes[12],
|
||||
+ (unsigned char)prop.uuid.bytes[13],
|
||||
+ (unsigned char)prop.uuid.bytes[14],
|
||||
+ (unsigned char)prop.uuid.bytes[15]
|
||||
+ );
|
||||
+ #else
|
||||
+ #ifdef _WIN32
|
||||
+ snprintf(id, sizeof(id), "%d", device_num);
|
||||
+ #else
|
||||
+ try {
|
||||
+ std::string uuid = std::string(prop.uuid.bytes, 16);
|
||||
+
|
||||
+ size_t pos = 0;
|
||||
+ unsigned long long v = stoull(uuid, &pos, 16);
|
||||
+ if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-'))
|
||||
+ throw std::invalid_argument("invalid uuid");
|
||||
+
|
||||
+ snprintf(id, sizeof(id), "GPU-%016llx", v);
|
||||
+ } catch (const std::exception &e) {
|
||||
+ snprintf(id, sizeof(id), "%d", device_num);
|
||||
+ }
|
||||
+ #endif
|
||||
+ #endif
|
||||
+
|
||||
+ return id;
|
||||
+}
|
||||
+
|
||||
static ggml_cuda_device_info ggml_cuda_init() {
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
// Workaround for a rocBLAS bug when using multiple graphics cards:
|
||||
@@ -261,22 +306,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
||||
info.devices[id].cc += prop.minor * 0x10;
|
||||
}
|
||||
}
|
||||
- GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
|
||||
+ GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n",
|
||||
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
|
||||
- device_vmm ? "yes" : "no", prop.warpSize);
|
||||
+ device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str());
|
||||
#elif defined(GGML_USE_MUSA)
|
||||
// FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
|
||||
info.devices[id].warp_size = 32;
|
||||
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
||||
info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
|
||||
info.devices[id].cc += prop.minor * 0x10;
|
||||
- GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
|
||||
- id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
||||
+ GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
|
||||
+ id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
||||
+ ggml_cuda_parse_uuid(prop, id).c_str());
|
||||
#else
|
||||
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
||||
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
||||
- GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
|
||||
- id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
||||
+ GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
|
||||
+ id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
||||
+ ggml_cuda_parse_uuid(prop, id).c_str());
|
||||
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
||||
}
|
||||
|
||||
@@ -2884,6 +2931,7 @@ struct ggml_backend_cuda_device_context {
|
||||
int device;
|
||||
std::string name;
|
||||
std::string description;
|
||||
@@ -35,7 +118,7 @@ index cb0d8528..d6960174 100644
|
||||
};
|
||||
|
||||
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
|
||||
@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
|
||||
@@ -2896,6 +2944,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
|
||||
return ctx->description.c_str();
|
||||
}
|
||||
|
||||
@@ -47,7 +130,7 @@ index cb0d8528..d6960174 100644
|
||||
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
||||
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
|
||||
ggml_cuda_set_device(ctx->device);
|
||||
@@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
|
||||
@@ -2910,6 +2963,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
|
||||
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
||||
props->name = ggml_backend_cuda_device_get_name(dev);
|
||||
props->description = ggml_backend_cuda_device_get_description(dev);
|
||||
@@ -55,47 +138,16 @@ index cb0d8528..d6960174 100644
|
||||
props->type = ggml_backend_cuda_device_get_type(dev);
|
||||
ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
||||
|
||||
@@ -3458,6 +3465,38 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||
@@ -3457,6 +3511,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||
cudaDeviceProp prop;
|
||||
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
|
||||
dev_ctx->description = prop.name;
|
||||
+ dev_ctx->id = ggml_cuda_parse_uuid(prop, i);
|
||||
|
||||
+ #if !defined(GGML_USE_HIP)
|
||||
+ char id[64];
|
||||
+ snprintf(id, sizeof(id),
|
||||
+ "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
||||
+ (unsigned char)prop.uuid.bytes[0],
|
||||
+ (unsigned char)prop.uuid.bytes[1],
|
||||
+ (unsigned char)prop.uuid.bytes[2],
|
||||
+ (unsigned char)prop.uuid.bytes[3],
|
||||
+ (unsigned char)prop.uuid.bytes[4],
|
||||
+ (unsigned char)prop.uuid.bytes[5],
|
||||
+ (unsigned char)prop.uuid.bytes[6],
|
||||
+ (unsigned char)prop.uuid.bytes[7],
|
||||
+ (unsigned char)prop.uuid.bytes[8],
|
||||
+ (unsigned char)prop.uuid.bytes[9],
|
||||
+ (unsigned char)prop.uuid.bytes[10],
|
||||
+ (unsigned char)prop.uuid.bytes[11],
|
||||
+ (unsigned char)prop.uuid.bytes[12],
|
||||
+ (unsigned char)prop.uuid.bytes[13],
|
||||
+ (unsigned char)prop.uuid.bytes[14],
|
||||
+ (unsigned char)prop.uuid.bytes[15]
|
||||
+ );
|
||||
+ dev_ctx->id = id;
|
||||
+ #else
|
||||
+ #ifdef _WIN32
|
||||
+ char id[16];
|
||||
+ snprintf(id, sizeof(id), "%d", i);
|
||||
+ dev_ctx->id = id;
|
||||
+ #else
|
||||
+ dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
|
||||
+ #endif
|
||||
+ #endif
|
||||
+
|
||||
ggml_backend_dev_t dev = new ggml_backend_device {
|
||||
/* .iface = */ ggml_backend_cuda_device_interface,
|
||||
/* .reg = */ ®,
|
||||
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
|
||||
index 1b56f858..a9eeebc6 100644
|
||||
index 1b56f858c..a9eeebc6a 100644
|
||||
--- a/ggml/src/ggml-metal/ggml-metal.m
|
||||
+++ b/ggml/src/ggml-metal/ggml-metal.m
|
||||
@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
|
||||
|
92
ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
vendored
92
ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
vendored
@@ -175,6 +175,51 @@ static int ggml_cuda_parse_id(char devName[]) {
|
||||
}
|
||||
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
||||
|
||||
static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) {
|
||||
char id[64];
|
||||
|
||||
#if !defined(GGML_USE_HIP)
|
||||
snprintf(id, sizeof(id),
|
||||
"GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
||||
(unsigned char)prop.uuid.bytes[0],
|
||||
(unsigned char)prop.uuid.bytes[1],
|
||||
(unsigned char)prop.uuid.bytes[2],
|
||||
(unsigned char)prop.uuid.bytes[3],
|
||||
(unsigned char)prop.uuid.bytes[4],
|
||||
(unsigned char)prop.uuid.bytes[5],
|
||||
(unsigned char)prop.uuid.bytes[6],
|
||||
(unsigned char)prop.uuid.bytes[7],
|
||||
(unsigned char)prop.uuid.bytes[8],
|
||||
(unsigned char)prop.uuid.bytes[9],
|
||||
(unsigned char)prop.uuid.bytes[10],
|
||||
(unsigned char)prop.uuid.bytes[11],
|
||||
(unsigned char)prop.uuid.bytes[12],
|
||||
(unsigned char)prop.uuid.bytes[13],
|
||||
(unsigned char)prop.uuid.bytes[14],
|
||||
(unsigned char)prop.uuid.bytes[15]
|
||||
);
|
||||
#else
|
||||
#ifdef _WIN32
|
||||
snprintf(id, sizeof(id), "%d", device_num);
|
||||
#else
|
||||
try {
|
||||
std::string uuid = std::string(prop.uuid.bytes, 16);
|
||||
|
||||
size_t pos = 0;
|
||||
unsigned long long v = stoull(uuid, &pos, 16);
|
||||
if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-'))
|
||||
throw std::invalid_argument("invalid uuid");
|
||||
|
||||
snprintf(id, sizeof(id), "GPU-%016llx", v);
|
||||
} catch (const std::exception &e) {
|
||||
snprintf(id, sizeof(id), "%d", device_num);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
static ggml_cuda_device_info ggml_cuda_init() {
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
// Workaround for a rocBLAS bug when using multiple graphics cards:
|
||||
@@ -263,22 +308,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
||||
info.devices[id].cc += prop.minor * 0x10;
|
||||
}
|
||||
}
|
||||
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
|
||||
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n",
|
||||
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
|
||||
device_vmm ? "yes" : "no", prop.warpSize);
|
||||
device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str());
|
||||
#elif defined(GGML_USE_MUSA)
|
||||
// FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
|
||||
info.devices[id].warp_size = 32;
|
||||
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
||||
info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
|
||||
info.devices[id].cc += prop.minor * 0x10;
|
||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
|
||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
|
||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
||||
ggml_cuda_parse_uuid(prop, id).c_str());
|
||||
#else
|
||||
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
||||
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
|
||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
|
||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
||||
ggml_cuda_parse_uuid(prop, id).c_str());
|
||||
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
||||
}
|
||||
|
||||
@@ -3475,38 +3522,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||
cudaDeviceProp prop;
|
||||
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
|
||||
dev_ctx->description = prop.name;
|
||||
|
||||
#if !defined(GGML_USE_HIP)
|
||||
char id[64];
|
||||
snprintf(id, sizeof(id),
|
||||
"GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
||||
(unsigned char)prop.uuid.bytes[0],
|
||||
(unsigned char)prop.uuid.bytes[1],
|
||||
(unsigned char)prop.uuid.bytes[2],
|
||||
(unsigned char)prop.uuid.bytes[3],
|
||||
(unsigned char)prop.uuid.bytes[4],
|
||||
(unsigned char)prop.uuid.bytes[5],
|
||||
(unsigned char)prop.uuid.bytes[6],
|
||||
(unsigned char)prop.uuid.bytes[7],
|
||||
(unsigned char)prop.uuid.bytes[8],
|
||||
(unsigned char)prop.uuid.bytes[9],
|
||||
(unsigned char)prop.uuid.bytes[10],
|
||||
(unsigned char)prop.uuid.bytes[11],
|
||||
(unsigned char)prop.uuid.bytes[12],
|
||||
(unsigned char)prop.uuid.bytes[13],
|
||||
(unsigned char)prop.uuid.bytes[14],
|
||||
(unsigned char)prop.uuid.bytes[15]
|
||||
);
|
||||
dev_ctx->id = id;
|
||||
#else
|
||||
#ifdef _WIN32
|
||||
char id[16];
|
||||
snprintf(id, sizeof(id), "%d", i);
|
||||
dev_ctx->id = id;
|
||||
#else
|
||||
dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
|
||||
#endif
|
||||
#endif
|
||||
dev_ctx->id = ggml_cuda_parse_uuid(prop, i);
|
||||
|
||||
ggml_backend_dev_t dev = new ggml_backend_device {
|
||||
/* .iface = */ ggml_backend_cuda_device_interface,
|
||||
|
Reference in New Issue
Block a user