From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Viraj Wadhwa Date: Tue, 4 Nov 2025 12:04:04 -0800 Subject: [PATCH] Add memory detection using DXGI + PDH --- ggml/src/CMakeLists.txt | 1 + ggml/src/ggml-impl.h | 3 + ggml/src/ggml-vulkan/ggml-vulkan.cpp | 29 ++- ggml/src/mem_dxgi_pdh.cpp | 297 +++++++++++++++++++++++++++ 4 files changed, 327 insertions(+), 3 deletions(-) create mode 100644 ggml/src/mem_dxgi_pdh.cpp diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index aefe43bdd..21fe4640c 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -211,6 +211,7 @@ add_library(ggml-base ggml-quants.h mem_hip.cpp mem_nvml.cpp + mem_dxgi_pdh.cpp gguf.cpp) target_include_directories(ggml-base PRIVATE .) diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index b63edd0c1..81cad8cf3 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -645,6 +645,9 @@ GGML_API void ggml_nvml_release(); GGML_API int ggml_hip_mgmt_init(); GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total); GGML_API void ggml_hip_mgmt_release(); +GGML_API int ggml_dxgi_pdh_init(); +GGML_API int ggml_dxgi_pdh_get_device_memory(const char* luid, size_t *free, size_t *total, bool is_integrated_gpu); +GGML_API void ggml_dxgi_pdh_release(); #ifdef __cplusplus } diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 7cfb14a54..a1c46d0b3 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -73,6 +73,7 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher(); #define VK_KHR_SHADER_BFLOAT16_EXTENSION_NAME "VK_KHR_shader_bfloat16" #define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_BFLOAT16_FEATURES_KHR ((VkStructureType)1000141000) #define VK_COMPONENT_TYPE_BFLOAT16_KHR ((VkComponentTypeKHR)1000141000) +#define VK_LUID_SIZE_KHR VK_LUID_SIZE typedef struct VkPhysicalDeviceShaderBfloat16FeaturesKHR { VkStructureType sType; @@ -12433,6 +12434,7 @@ struct ggml_backend_vk_device_context { std::string pci_id; std::string id; std::string uuid; + std::string luid; int major; int minor; int driver_major; @@ -12448,8 +12450,22 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties(); vk::PhysicalDeviceProperties2 props2; vkdev.getProperties2(&props2); + GGML_LOG_DEBUG("ggml_backend_vk_get_device_memory called: uuid %s\n", ctx->uuid.c_str()); + GGML_LOG_DEBUG("ggml_backend_vk_get_device_memory called: luid %s\n", ctx->luid.c_str()); + + // Check VRAM reporting for Windows IGPU/DGPU using DXGI + PDH (vendor agnostic) + if (ggml_dxgi_pdh_init() == 0) { + GGML_LOG_DEBUG("DXGI + PDH Initialized. Getting GPU free memory info\n"); + int status = ggml_dxgi_pdh_get_device_memory(ctx->luid.c_str(), free, total, ctx->is_integrated_gpu); + if (status == 0) { + GGML_LOG_DEBUG("%s utilizing DXGI + PDH memory reporting free: %zu total: %zu\n", __func__, *free, *total); + ggml_dxgi_pdh_release(); + return; + } + ggml_dxgi_pdh_release(); + } - if (!ctx->is_integrated_gpu) + if (!ctx->is_integrated_gpu) { // Use vendor specific management libraries for best VRAM reporting if available switch (props2.properties.vendorID) { @@ -12477,8 +12493,8 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size break; } } - // else fallback to memory budget if supported + // else fallback to memory budget if supported *total = 0; *free = 0; vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props; @@ -13089,7 +13105,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, /* .reg = */ reg, /* .context = */ ctx, }); - // Gather additional information about the device int dev_idx = vk_instance.device_indices[i]; vk::PhysicalDeviceProperties props1; @@ -13112,6 +13127,14 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, } } ctx->uuid = oss.str(); + const auto& luid = device_id_props.deviceLUID; + char luid_str[32]; // "0x" + 16 hex digits + null terminator = 19 chars + snprintf(luid_str, sizeof(luid_str), // high part + low part + "0x%02x%02x%02x%02x%02x%02x%02x%02x", + luid[7], luid[6], luid[5], luid[4], + luid[3], luid[2], luid[1], luid[0] + ); + ctx->luid = std::string(luid_str); ctx->major = 0; ctx->minor = 0; // TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string diff --git a/ggml/src/mem_dxgi_pdh.cpp b/ggml/src/mem_dxgi_pdh.cpp new file mode 100644 index 000000000..2f395761c --- /dev/null +++ b/ggml/src/mem_dxgi_pdh.cpp @@ -0,0 +1,297 @@ +// DXGI and PDH Performance Counters Library +// This Windows-only (10/11) library provides accurate VRAM reporting +#include "ggml.h" +#include "ggml-impl.h" + +#ifdef _WIN32 +# define WIN32_LEAN_AND_MEAN +# ifndef NOMINMAX +# define NOMINMAX +# endif +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +static std::mutex ggml_dxgi_pdh_lock; + +/* +Struct to keep track of GPU adapter information at runtime +*/ +struct GpuInfo { + std::wstring description; // debug field + LUID luid; + std::wstring pdhInstance; + double dedicatedTotal = 0.0; + double sharedTotal = 0.0; + double dedicatedUsage = 0.0; + double sharedUsage = 0.0; +}; + +/* +DLL Function Pointers +*/ +struct { + void *dxgi_dll_handle; + void *pdh_dll_handle; + // DXGI Functions + HRESULT (*CreateDXGIFactory1)(REFIID riid, void **ppFactory); + // PDH functions + PDH_STATUS (*PdhOpenQueryW)(LPCWSTR szDataSource, DWORD_PTR dwUserData, PDH_HQUERY *phQuery); + PDH_STATUS (*PdhAddCounterW)(PDH_HQUERY hQuery, LPCWSTR szFullCounterPath, DWORD_PTR dwUserData, PDH_HCOUNTER *phCounter); + PDH_STATUS (*PdhCollectQueryData)(PDH_HQUERY hQuery); + PDH_STATUS (*PdhGetFormattedCounterValue)(PDH_HCOUNTER hCounter, DWORD dwFormat, LPDWORD lpdwType, PPDH_FMT_COUNTERVALUE pValue); + PDH_STATUS (*PdhCloseQuery)(PDH_HQUERY hQuery); +} dll_functions { + nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr +}; + +/* +Create a PDH Instance name +*/ +static std::wstring generate_pdh_instance_name_from_luid(const LUID& luid) { + std::wstringstream ss; + ss << L"luid_0x" << std::hex << std::setw(8) << std::setfill(L'0') << std::uppercase << luid.HighPart + << L"_0x" << std::setw(8) << std::setfill(L'0') << luid.LowPart; + return ss.str(); +} + +/* +Conversion from Bytes to GigaBytes +*/ +template +static inline double b_to_gb(T n) +{ + return (double(n) / (1024.0 * 1024 * 1024)); +} + +/* +Fetch the GPU adapter 'dedicated memory' and 'shared memory' using DXGI +*/ +static void fetch_dxgi_adapter_desc1(const DXGI_ADAPTER_DESC1& desc, GpuInfo* info) { + auto dedicatedVideoMemory = desc.DedicatedVideoMemory; + auto sharedSystemMemory = desc.SharedSystemMemory; + GGML_LOG_DEBUG("[DXGI] Adapter Description: %ls, LUID: 0x%08X%08X, Dedicated: %.2f GB, Shared: %.2f GB\n", desc.Description, desc.AdapterLuid.HighPart, desc.AdapterLuid.LowPart, b_to_gb(dedicatedVideoMemory), b_to_gb(sharedSystemMemory)); + if (info) { + info->dedicatedTotal = dedicatedVideoMemory; // values in bytes + info->sharedTotal = sharedSystemMemory; + } +} + +/* +Enumerate over the GPU adapters detected using DXGI and return their information +*/ +static std::vector get_dxgi_gpu_infos() { + std::vector infos; + IDXGIFactory1* pFactory = nullptr; + + if (SUCCEEDED(dll_functions.CreateDXGIFactory1(__uuidof(IDXGIFactory1), (void**)&pFactory))) { + UINT i = 0; + IDXGIAdapter1* pAdapter = nullptr; + while (pFactory->EnumAdapters1(i, &pAdapter) != DXGI_ERROR_NOT_FOUND) { + DXGI_ADAPTER_DESC1 desc; + pAdapter->GetDesc1(&desc); + + // Get all the GPU adapter info + GpuInfo info; + fetch_dxgi_adapter_desc1(desc, &info); + info.description = std::wstring(desc.Description); + info.luid = desc.AdapterLuid; + info.pdhInstance = generate_pdh_instance_name_from_luid(desc.AdapterLuid); + infos.push_back(info); + + pAdapter->Release(); + ++i; + } + pFactory->Release(); + } + return infos; +} + +static bool get_gpu_memory_usage(GpuInfo& gpu) { + PDH_HQUERY query; + if (dll_functions.PdhOpenQueryW(NULL, 0, &query) != ERROR_SUCCESS) { + return false; + } + + struct GpuCounters { + PDH_HCOUNTER dedicated; + PDH_HCOUNTER shared; + }; + + GpuCounters gpuCounter{}; + + std::wstring dedicatedPath = L"\\GPU Adapter Memory(" + gpu.pdhInstance + L"*)\\Dedicated Usage"; + std::wstring sharedPath = L"\\GPU Adapter Memory(" + gpu.pdhInstance + L"*)\\Shared Usage"; + + if (dll_functions.PdhAddCounterW(query, dedicatedPath.c_str(), 0, &gpuCounter.dedicated) != ERROR_SUCCESS || + dll_functions.PdhAddCounterW(query, sharedPath.c_str(), 0, &gpuCounter.shared) != ERROR_SUCCESS) { + GGML_LOG_ERROR("Failed to add PDH counters for GPU %s\n", std::string(gpu.pdhInstance.begin(), gpu.pdhInstance.end()).c_str()); + dll_functions.PdhCloseQuery(query); + return false; + } + + // Sample the data + if (dll_functions.PdhCollectQueryData(query) != ERROR_SUCCESS) { + dll_functions.PdhCloseQuery(query); + return false; + } + + // Read final values + PDH_FMT_COUNTERVALUE val; + + if (dll_functions.PdhGetFormattedCounterValue(gpuCounter.dedicated, PDH_FMT_DOUBLE, NULL, &val) == ERROR_SUCCESS) + gpu.dedicatedUsage = val.doubleValue; + + if (dll_functions.PdhGetFormattedCounterValue(gpuCounter.shared, PDH_FMT_DOUBLE, NULL, &val) == ERROR_SUCCESS) + gpu.sharedUsage = val.doubleValue; + + dll_functions.PdhCloseQuery(query); + return true; +} + + +extern "C" { + + int ggml_dxgi_pdh_init() { + GGML_LOG_DEBUG("%s called\n", __func__); + std::lock_guard lock(ggml_dxgi_pdh_lock); + if (dll_functions.dxgi_dll_handle != NULL && dll_functions.pdh_dll_handle != NULL) { + // Already initialized as we have both DLL handles + return ERROR_SUCCESS; + } + + DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); + SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); + fs::path libPath_dxgi = fs::path("\\Windows") / fs::path("System32") / fs::path("dxgi.dll"); + fs::path libPath_pdh = fs::path("\\Windows") / fs::path("System32") / fs::path("pdh.dll"); + + // Call LoadLibraryW on both DLLs to ensure they are loaded + void *dxgi = (void*)LoadLibraryW(libPath_dxgi.wstring().c_str()); + void *pdh = (void*)LoadLibraryW(libPath_pdh.wstring().c_str()); + if(dxgi == NULL || pdh == NULL) { + if (dxgi != NULL) { + FreeLibrary((HMODULE)(dxgi)); + } + if (pdh != NULL) { + FreeLibrary((HMODULE)(pdh)); + } + SetErrorMode(old_mode); + return ERROR_DLL_NOT_FOUND; + } + else { + // save the dll handles + dll_functions.dxgi_dll_handle = dxgi; + dll_functions.pdh_dll_handle = pdh; + } + + // Get pointers to the library functions loaded by the DLLs + dll_functions.CreateDXGIFactory1 = (HRESULT (*)(REFIID riid, void **ppFactory)) GetProcAddress((HMODULE)(dll_functions.dxgi_dll_handle), "CreateDXGIFactory1"); + dll_functions.PdhOpenQueryW = (PDH_STATUS (*)(LPCWSTR szDataSource, DWORD_PTR dwUserData, PDH_HQUERY *phQuery)) GetProcAddress((HMODULE)(dll_functions.pdh_dll_handle), "PdhOpenQueryW"); + dll_functions.PdhAddCounterW = (PDH_STATUS (*)(PDH_HQUERY hQuery, LPCWSTR szFullCounterPath, DWORD_PTR dwUserData, PDH_HCOUNTER *phCounter)) GetProcAddress((HMODULE)(dll_functions.pdh_dll_handle), "PdhAddCounterW"); + dll_functions.PdhCollectQueryData = (PDH_STATUS (*)(PDH_HQUERY hQuery)) GetProcAddress((HMODULE)(dll_functions.pdh_dll_handle), "PdhCollectQueryData"); + dll_functions.PdhGetFormattedCounterValue = (PDH_STATUS (*)(PDH_HCOUNTER hCounter, DWORD dwFormat, LPDWORD lpdwType, PPDH_FMT_COUNTERVALUE pValue)) GetProcAddress((HMODULE)(dll_functions.pdh_dll_handle), "PdhGetFormattedCounterValue"); + dll_functions.PdhCloseQuery = (PDH_STATUS (*)(PDH_HQUERY hQuery)) GetProcAddress((HMODULE)(dll_functions.pdh_dll_handle), "PdhCloseQuery"); + + SetErrorMode(old_mode); // set old mode before any return + + // Check if any function pointers are NULL (not found) + if (dll_functions.CreateDXGIFactory1 == NULL || dll_functions.PdhOpenQueryW == NULL || dll_functions.PdhAddCounterW == NULL || dll_functions.PdhCollectQueryData == NULL || dll_functions.PdhGetFormattedCounterValue == NULL || dll_functions.PdhCloseQuery == NULL) { + GGML_LOG_INFO("%s unable to locate required symbols in either dxgi.dll or pdh.dll", __func__); + FreeLibrary((HMODULE)(dll_functions.dxgi_dll_handle)); + FreeLibrary((HMODULE)(dll_functions.pdh_dll_handle)); + dll_functions.dxgi_dll_handle = NULL; + dll_functions.pdh_dll_handle = NULL; + return ERROR_PROC_NOT_FOUND; + } + + // No other initializations needed, successfully loaded the libraries and functions! + return ERROR_SUCCESS; + } + + void ggml_dxgi_pdh_release() { + std::lock_guard lock(ggml_dxgi_pdh_lock); + if (dll_functions.dxgi_dll_handle == NULL && dll_functions.pdh_dll_handle == NULL) { + // Already freed the DLLs + return; + } + + // Call FreeLibrary on both DLLs + FreeLibrary((HMODULE)(dll_functions.dxgi_dll_handle)); + FreeLibrary((HMODULE)(dll_functions.pdh_dll_handle)); + + dll_functions.dxgi_dll_handle = NULL; + dll_functions.pdh_dll_handle = NULL; + + return; // successfully released + } + + int ggml_dxgi_pdh_get_device_memory(const char* luid, size_t *free, size_t *total, bool is_integrated_gpu) { + + std::lock_guard lock(ggml_dxgi_pdh_lock); + + // Enumerate GPUs using DXGI and find the matching LUID + // This also fetches the total memory info for each of the enumerated GPUs + std::vector gpus = get_dxgi_gpu_infos(); + GpuInfo *targetGpu = nullptr; + for (auto& gpu : gpus) { + char luid_buffer[32]; // "0x" + 16 hex digits + null terminator + snprintf(luid_buffer, sizeof(luid_buffer), "0x%08x%08x", gpu.luid.HighPart, gpu.luid.LowPart); + std::string gpu_luid_str(luid_buffer); + if (gpu_luid_str == std::string(luid)) { + targetGpu = &gpu; + break; + } + } + if (!targetGpu) { + GGML_LOG_ERROR("GPU with LUID %s not found.\n", luid); + return ERROR_NOT_FOUND; + } + + // Get the current memory usage for the target GPU + int status = get_gpu_memory_usage(*targetGpu); + if (!status) { + GGML_LOG_ERROR("Failed to get GPU memory usage.\n"); + return ERROR_DEVICE_NOT_AVAILABLE; + } + + // Calculate the free memory based on whether it's an integrated or discrete GPU + if (is_integrated_gpu) { + // IGPU free = SharedTotal - SharedUsage + GGML_LOG_DEBUG("Integrated GPU (%ls) with LUID %s detected. Shared Total: %.2f bytes (%.2f GB), Shared Usage: %.2f bytes (%.2f GB), Dedicated Total: %.2f bytes (%.2f GB), Dedicated Usage: %.2f bytes (%.2f GB)\n", targetGpu->description.c_str(), luid, targetGpu->sharedTotal, b_to_gb(targetGpu->sharedTotal), targetGpu->sharedUsage, b_to_gb(targetGpu->sharedUsage), targetGpu->dedicatedTotal, b_to_gb(targetGpu->dedicatedTotal), targetGpu->dedicatedUsage, b_to_gb(targetGpu->dedicatedUsage)); + *free = (targetGpu->sharedTotal - targetGpu->sharedUsage) + (targetGpu->dedicatedTotal - targetGpu->dedicatedUsage); // Some IGPUs also have dedicated memory, which can be used along with the IGPU's shared memory + *total = targetGpu->sharedTotal + targetGpu->dedicatedTotal; + } + else { + // DGPU free = DedicatedTotal - DedicatedUsage + GGML_LOG_DEBUG("Discrete GPU (%ls) with LUID %s detected. Dedicated Total: %.2f bytes (%.2f GB), Dedicated Usage: %.2f bytes (%.2f GB)\n", targetGpu->description.c_str(), luid, targetGpu->dedicatedTotal, b_to_gb(targetGpu->dedicatedTotal), targetGpu->dedicatedUsage, b_to_gb(targetGpu->dedicatedUsage)); + *free = targetGpu->dedicatedTotal - targetGpu->dedicatedUsage; + *total = targetGpu->dedicatedTotal; + } + + return ERROR_SUCCESS; + } + +} // extern "C" + +#else // #ifdef _WIN32 + +extern "C" { + + // DXGI + PDH not available for Linux implementation + int ggml_dxgi_pdh_init() { + return -1; + } + void ggml_dxgi_pdh_release() {} + int ggml_dxgi_pdh_get_device_memory(const char* luid, size_t *free, size_t *total, bool is_integrated_gpu) { + return -1; + } + +} // extern "C" + +#endif // #ifdef _WIN32 \ No newline at end of file