mirror of
https://github.com/ollama/ollama.git
synced 2025-08-25 20:51:11 +02:00
ggml: Use ordinal IDs for AMD GPUs on Linux when UUID is unavailable
Some AMD GPUs do not provide UUIDs and report only "XX". In these cases, we should use the ordinal ID as an alternate identifier. This is the same as we always need to do on Windows for AMD. In addition, this prints out the ID for each GPU when enumerating them for easier debugging in the future.
This commit is contained in:
92
ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
vendored
92
ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
vendored
@@ -175,6 +175,51 @@ static int ggml_cuda_parse_id(char devName[]) {
|
||||
}
|
||||
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
||||
|
||||
static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) {
|
||||
char id[64];
|
||||
|
||||
#if !defined(GGML_USE_HIP)
|
||||
snprintf(id, sizeof(id),
|
||||
"GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
||||
(unsigned char)prop.uuid.bytes[0],
|
||||
(unsigned char)prop.uuid.bytes[1],
|
||||
(unsigned char)prop.uuid.bytes[2],
|
||||
(unsigned char)prop.uuid.bytes[3],
|
||||
(unsigned char)prop.uuid.bytes[4],
|
||||
(unsigned char)prop.uuid.bytes[5],
|
||||
(unsigned char)prop.uuid.bytes[6],
|
||||
(unsigned char)prop.uuid.bytes[7],
|
||||
(unsigned char)prop.uuid.bytes[8],
|
||||
(unsigned char)prop.uuid.bytes[9],
|
||||
(unsigned char)prop.uuid.bytes[10],
|
||||
(unsigned char)prop.uuid.bytes[11],
|
||||
(unsigned char)prop.uuid.bytes[12],
|
||||
(unsigned char)prop.uuid.bytes[13],
|
||||
(unsigned char)prop.uuid.bytes[14],
|
||||
(unsigned char)prop.uuid.bytes[15]
|
||||
);
|
||||
#else
|
||||
#ifdef _WIN32
|
||||
snprintf(id, sizeof(id), "%d", device_num);
|
||||
#else
|
||||
try {
|
||||
std::string uuid = std::string(prop.uuid.bytes, 16);
|
||||
|
||||
size_t pos = 0;
|
||||
unsigned long long v = stoull(uuid, &pos, 16);
|
||||
if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-'))
|
||||
throw std::invalid_argument("invalid uuid");
|
||||
|
||||
snprintf(id, sizeof(id), "GPU-%016llx", v);
|
||||
} catch (const std::exception &e) {
|
||||
snprintf(id, sizeof(id), "%d", device_num);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
static ggml_cuda_device_info ggml_cuda_init() {
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
// Workaround for a rocBLAS bug when using multiple graphics cards:
|
||||
@@ -263,22 +308,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
||||
info.devices[id].cc += prop.minor * 0x10;
|
||||
}
|
||||
}
|
||||
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
|
||||
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n",
|
||||
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
|
||||
device_vmm ? "yes" : "no", prop.warpSize);
|
||||
device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str());
|
||||
#elif defined(GGML_USE_MUSA)
|
||||
// FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
|
||||
info.devices[id].warp_size = 32;
|
||||
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
||||
info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
|
||||
info.devices[id].cc += prop.minor * 0x10;
|
||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
|
||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
|
||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
||||
ggml_cuda_parse_uuid(prop, id).c_str());
|
||||
#else
|
||||
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
||||
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
|
||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
|
||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
||||
ggml_cuda_parse_uuid(prop, id).c_str());
|
||||
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
||||
}
|
||||
|
||||
@@ -3475,38 +3522,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||
cudaDeviceProp prop;
|
||||
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
|
||||
dev_ctx->description = prop.name;
|
||||
|
||||
#if !defined(GGML_USE_HIP)
|
||||
char id[64];
|
||||
snprintf(id, sizeof(id),
|
||||
"GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
||||
(unsigned char)prop.uuid.bytes[0],
|
||||
(unsigned char)prop.uuid.bytes[1],
|
||||
(unsigned char)prop.uuid.bytes[2],
|
||||
(unsigned char)prop.uuid.bytes[3],
|
||||
(unsigned char)prop.uuid.bytes[4],
|
||||
(unsigned char)prop.uuid.bytes[5],
|
||||
(unsigned char)prop.uuid.bytes[6],
|
||||
(unsigned char)prop.uuid.bytes[7],
|
||||
(unsigned char)prop.uuid.bytes[8],
|
||||
(unsigned char)prop.uuid.bytes[9],
|
||||
(unsigned char)prop.uuid.bytes[10],
|
||||
(unsigned char)prop.uuid.bytes[11],
|
||||
(unsigned char)prop.uuid.bytes[12],
|
||||
(unsigned char)prop.uuid.bytes[13],
|
||||
(unsigned char)prop.uuid.bytes[14],
|
||||
(unsigned char)prop.uuid.bytes[15]
|
||||
);
|
||||
dev_ctx->id = id;
|
||||
#else
|
||||
#ifdef _WIN32
|
||||
char id[16];
|
||||
snprintf(id, sizeof(id), "%d", i);
|
||||
dev_ctx->id = id;
|
||||
#else
|
||||
dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
|
||||
#endif
|
||||
#endif
|
||||
dev_ctx->id = ggml_cuda_parse_uuid(prop, i);
|
||||
|
||||
ggml_backend_dev_t dev = new ggml_backend_device {
|
||||
/* .iface = */ ggml_backend_cuda_device_interface,
|
||||
|
Reference in New Issue
Block a user