diff --git a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch index b7d56b0d88..2bd938a3f5 100644 --- a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch +++ b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch @@ -7,12 +7,12 @@ This enables matching up devices and information reported by the backend with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml). --- ggml/include/ggml-backend.h | 1 + - ggml/src/ggml-cuda/ggml-cuda.cu | 39 ++++++++++++++++++++++++++++++++ + ggml/src/ggml-cuda/ggml-cuda.cu | 67 +++++++++++++++++++++++++++++--- ggml/src/ggml-metal/ggml-metal.m | 1 + - 3 files changed, 41 insertions(+) + 3 files changed, 63 insertions(+), 6 deletions(-) diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h -index 74e46716..48839339 100644 +index 74e467163..48839339d 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -152,6 +152,7 @@ extern "C" { @@ -24,10 +24,93 @@ index 74e46716..48839339 100644 size_t memory_total; enum ggml_backend_dev_type type; diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu -index cb0d8528..d6960174 100644 +index cb0d8528d..1492368de 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu -@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context { +@@ -173,6 +173,51 @@ static int ggml_cuda_parse_id(char devName[]) { + } + #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) + ++static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) { ++ char id[64]; ++ ++ #if !defined(GGML_USE_HIP) ++ snprintf(id, sizeof(id), ++ "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", ++ (unsigned char)prop.uuid.bytes[0], ++ (unsigned char)prop.uuid.bytes[1], ++ (unsigned char)prop.uuid.bytes[2], ++ (unsigned char)prop.uuid.bytes[3], ++ (unsigned char)prop.uuid.bytes[4], ++ (unsigned char)prop.uuid.bytes[5], ++ (unsigned char)prop.uuid.bytes[6], ++ (unsigned char)prop.uuid.bytes[7], ++ (unsigned char)prop.uuid.bytes[8], ++ (unsigned char)prop.uuid.bytes[9], ++ (unsigned char)prop.uuid.bytes[10], ++ (unsigned char)prop.uuid.bytes[11], ++ (unsigned char)prop.uuid.bytes[12], ++ (unsigned char)prop.uuid.bytes[13], ++ (unsigned char)prop.uuid.bytes[14], ++ (unsigned char)prop.uuid.bytes[15] ++ ); ++ #else ++ #ifdef _WIN32 ++ snprintf(id, sizeof(id), "%d", device_num); ++ #else ++ try { ++ std::string uuid = std::string(prop.uuid.bytes, 16); ++ ++ size_t pos = 0; ++ unsigned long long v = stoull(uuid, &pos, 16); ++ if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-')) ++ throw std::invalid_argument("invalid uuid"); ++ ++ snprintf(id, sizeof(id), "GPU-%016llx", v); ++ } catch (const std::exception &e) { ++ snprintf(id, sizeof(id), "%d", device_num); ++ } ++ #endif ++ #endif ++ ++ return id; ++} ++ + static ggml_cuda_device_info ggml_cuda_init() { + #ifdef __HIP_PLATFORM_AMD__ + // Workaround for a rocBLAS bug when using multiple graphics cards: +@@ -261,22 +306,24 @@ static ggml_cuda_device_info ggml_cuda_init() { + info.devices[id].cc += prop.minor * 0x10; + } + } +- GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n", ++ GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n", + id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff, +- device_vmm ? "yes" : "no", prop.warpSize); ++ device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str()); + #elif defined(GGML_USE_MUSA) + // FIXME: Ensure compatibility with varying warp sizes across different MUSA archs. + info.devices[id].warp_size = 32; + info.devices[id].smpbo = prop.sharedMemPerBlockOptin; + info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100; + info.devices[id].cc += prop.minor * 0x10; +- GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n", +- id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no"); ++ GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n", ++ id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no", ++ ggml_cuda_parse_uuid(prop, id).c_str()); + #else + info.devices[id].smpbo = prop.sharedMemPerBlockOptin; + info.devices[id].cc = 100*prop.major + 10*prop.minor; +- GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n", +- id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no"); ++ GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n", ++ id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no", ++ ggml_cuda_parse_uuid(prop, id).c_str()); + #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) + } + +@@ -2884,6 +2931,7 @@ struct ggml_backend_cuda_device_context { int device; std::string name; std::string description; @@ -35,7 +118,7 @@ index cb0d8528..d6960174 100644 }; static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) { -@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t +@@ -2896,6 +2944,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t return ctx->description.c_str(); } @@ -47,7 +130,7 @@ index cb0d8528..d6960174 100644 static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context; ggml_cuda_set_device(ctx->device); -@@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend +@@ -2910,6 +2963,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) { props->name = ggml_backend_cuda_device_get_name(dev); props->description = ggml_backend_cuda_device_get_description(dev); @@ -55,47 +138,16 @@ index cb0d8528..d6960174 100644 props->type = ggml_backend_cuda_device_get_type(dev); ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total); -@@ -3458,6 +3465,38 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { +@@ -3457,6 +3511,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { + cudaDeviceProp prop; CUDA_CHECK(cudaGetDeviceProperties(&prop, i)); dev_ctx->description = prop.name; ++ dev_ctx->id = ggml_cuda_parse_uuid(prop, i); -+ #if !defined(GGML_USE_HIP) -+ char id[64]; -+ snprintf(id, sizeof(id), -+ "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", -+ (unsigned char)prop.uuid.bytes[0], -+ (unsigned char)prop.uuid.bytes[1], -+ (unsigned char)prop.uuid.bytes[2], -+ (unsigned char)prop.uuid.bytes[3], -+ (unsigned char)prop.uuid.bytes[4], -+ (unsigned char)prop.uuid.bytes[5], -+ (unsigned char)prop.uuid.bytes[6], -+ (unsigned char)prop.uuid.bytes[7], -+ (unsigned char)prop.uuid.bytes[8], -+ (unsigned char)prop.uuid.bytes[9], -+ (unsigned char)prop.uuid.bytes[10], -+ (unsigned char)prop.uuid.bytes[11], -+ (unsigned char)prop.uuid.bytes[12], -+ (unsigned char)prop.uuid.bytes[13], -+ (unsigned char)prop.uuid.bytes[14], -+ (unsigned char)prop.uuid.bytes[15] -+ ); -+ dev_ctx->id = id; -+ #else -+ #ifdef _WIN32 -+ char id[16]; -+ snprintf(id, sizeof(id), "%d", i); -+ dev_ctx->id = id; -+ #else -+ dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16); -+ #endif -+ #endif -+ ggml_backend_dev_t dev = new ggml_backend_device { /* .iface = */ ggml_backend_cuda_device_interface, - /* .reg = */ ®, diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m -index 1b56f858..a9eeebc6 100644 +index 1b56f858c..a9eeebc6a 100644 --- a/ggml/src/ggml-metal/ggml-metal.m +++ b/ggml/src/ggml-metal/ggml-metal.m @@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu index 080e7467b0..496973adf2 100644 --- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu @@ -175,6 +175,51 @@ static int ggml_cuda_parse_id(char devName[]) { } #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) +static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) { + char id[64]; + + #if !defined(GGML_USE_HIP) + snprintf(id, sizeof(id), + "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + (unsigned char)prop.uuid.bytes[0], + (unsigned char)prop.uuid.bytes[1], + (unsigned char)prop.uuid.bytes[2], + (unsigned char)prop.uuid.bytes[3], + (unsigned char)prop.uuid.bytes[4], + (unsigned char)prop.uuid.bytes[5], + (unsigned char)prop.uuid.bytes[6], + (unsigned char)prop.uuid.bytes[7], + (unsigned char)prop.uuid.bytes[8], + (unsigned char)prop.uuid.bytes[9], + (unsigned char)prop.uuid.bytes[10], + (unsigned char)prop.uuid.bytes[11], + (unsigned char)prop.uuid.bytes[12], + (unsigned char)prop.uuid.bytes[13], + (unsigned char)prop.uuid.bytes[14], + (unsigned char)prop.uuid.bytes[15] + ); + #else + #ifdef _WIN32 + snprintf(id, sizeof(id), "%d", device_num); + #else + try { + std::string uuid = std::string(prop.uuid.bytes, 16); + + size_t pos = 0; + unsigned long long v = stoull(uuid, &pos, 16); + if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-')) + throw std::invalid_argument("invalid uuid"); + + snprintf(id, sizeof(id), "GPU-%016llx", v); + } catch (const std::exception &e) { + snprintf(id, sizeof(id), "%d", device_num); + } + #endif + #endif + + return id; +} + static ggml_cuda_device_info ggml_cuda_init() { #ifdef __HIP_PLATFORM_AMD__ // Workaround for a rocBLAS bug when using multiple graphics cards: @@ -263,22 +308,24 @@ static ggml_cuda_device_info ggml_cuda_init() { info.devices[id].cc += prop.minor * 0x10; } } - GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n", + GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n", id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff, - device_vmm ? "yes" : "no", prop.warpSize); + device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str()); #elif defined(GGML_USE_MUSA) // FIXME: Ensure compatibility with varying warp sizes across different MUSA archs. info.devices[id].warp_size = 32; info.devices[id].smpbo = prop.sharedMemPerBlockOptin; info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100; info.devices[id].cc += prop.minor * 0x10; - GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n", - id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no"); + GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n", + id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no", + ggml_cuda_parse_uuid(prop, id).c_str()); #else info.devices[id].smpbo = prop.sharedMemPerBlockOptin; info.devices[id].cc = 100*prop.major + 10*prop.minor; - GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n", - id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no"); + GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n", + id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no", + ggml_cuda_parse_uuid(prop, id).c_str()); #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) } @@ -3475,38 +3522,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { cudaDeviceProp prop; CUDA_CHECK(cudaGetDeviceProperties(&prop, i)); dev_ctx->description = prop.name; - - #if !defined(GGML_USE_HIP) - char id[64]; - snprintf(id, sizeof(id), - "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", - (unsigned char)prop.uuid.bytes[0], - (unsigned char)prop.uuid.bytes[1], - (unsigned char)prop.uuid.bytes[2], - (unsigned char)prop.uuid.bytes[3], - (unsigned char)prop.uuid.bytes[4], - (unsigned char)prop.uuid.bytes[5], - (unsigned char)prop.uuid.bytes[6], - (unsigned char)prop.uuid.bytes[7], - (unsigned char)prop.uuid.bytes[8], - (unsigned char)prop.uuid.bytes[9], - (unsigned char)prop.uuid.bytes[10], - (unsigned char)prop.uuid.bytes[11], - (unsigned char)prop.uuid.bytes[12], - (unsigned char)prop.uuid.bytes[13], - (unsigned char)prop.uuid.bytes[14], - (unsigned char)prop.uuid.bytes[15] - ); - dev_ctx->id = id; - #else - #ifdef _WIN32 - char id[16]; - snprintf(id, sizeof(id), "%d", i); - dev_ctx->id = id; - #else - dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16); - #endif - #endif + dev_ctx->id = ggml_cuda_parse_uuid(prop, i); ggml_backend_dev_t dev = new ggml_backend_device { /* .iface = */ ggml_backend_cuda_device_interface,