From 850da848c58869474c97fc5c4ae58d27f05b65a1 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 14 Oct 2025 11:26:28 -0700 Subject: [PATCH] logs: fix bogus "0 MiB free" log line (#12590) On the llama runner, after the recent GGML bump a new log line reports incorrect 0 MiB free after our patch to remove memory from the props. This adjusts the llama.cpp code to fetch the actual free memory of the active device. --- llama/llama.cpp/src/llama.cpp | 4 ++- ...gml-Enable-resetting-backend-devices.patch | 31 +++++++++++++++---- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/llama/llama.cpp/src/llama.cpp b/llama/llama.cpp/src/llama.cpp index fe5a7a8354..d821a96a02 100644 --- a/llama/llama.cpp/src/llama.cpp +++ b/llama/llama.cpp/src/llama.cpp @@ -267,10 +267,12 @@ static struct llama_model * llama_model_load_from_file_impl( for (auto * dev : model->devices) { ggml_backend_dev_props props; ggml_backend_dev_get_props(dev, &props); + size_t memory_free, memory_total; + ggml_backend_dev_memory(dev, &memory_free, &memory_total); LLAMA_LOG_INFO("%s: using device %s (%s) (%s) - %zu MiB free\n", __func__, ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), props.device_id ? props.device_id : "unknown id", - props.memory_free/1024/1024); + memory_free/1024/1024); } const int status = llama_model_load(path_model, splits, *model, params); diff --git a/llama/patches/0024-ggml-Enable-resetting-backend-devices.patch b/llama/patches/0024-ggml-Enable-resetting-backend-devices.patch index 0df3be07fb..1cb10d93af 100644 --- a/llama/patches/0024-ggml-Enable-resetting-backend-devices.patch +++ b/llama/patches/0024-ggml-Enable-resetting-backend-devices.patch @@ -12,10 +12,11 @@ unused then it can be reset to free these data structures. ggml/src/ggml-backend.cpp | 8 ++++++++ ggml/src/ggml-cuda/ggml-cuda.cu | 16 +++++++++++++++- ggml/src/ggml-cuda/vendors/hip.h | 1 + - 5 files changed, 29 insertions(+), 1 deletion(-) + src/llama.cpp | 4 +++- + 6 files changed, 32 insertions(+), 2 deletions(-) diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h -index 1ff53ed0..ba181d09 100644 +index 1ff53ed03..ba181d09d 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -178,6 +178,7 @@ extern "C" { @@ -27,7 +28,7 @@ index 1ff53ed0..ba181d09 100644 GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device); GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size); diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h -index 3c3f22fc..43c91d9f 100644 +index 3c3f22fc0..43c91d9f2 100644 --- a/ggml/src/ggml-backend-impl.h +++ b/ggml/src/ggml-backend-impl.h @@ -195,6 +195,10 @@ extern "C" { @@ -42,7 +43,7 @@ index 3c3f22fc..43c91d9f 100644 struct ggml_backend_device { diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp -index 6ef5eeaf..0b757af5 100644 +index 6ef5eeafa..0b757af59 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -526,6 +526,14 @@ ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * par @@ -61,7 +62,7 @@ index 6ef5eeaf..0b757af5 100644 GGML_ASSERT(device); return device->iface.get_buffer_type(device); diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu -index 811462c7..87c6c34a 100644 +index 811462c79..87c6c34a4 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -107,6 +107,11 @@ int ggml_cuda_get_device() { @@ -109,7 +110,7 @@ index 811462c7..87c6c34a 100644 // backend reg diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h -index 890c1036..1f06be80 100644 +index 890c10364..1f06be80e 100644 --- a/ggml/src/ggml-cuda/vendors/hip.h +++ b/ggml/src/ggml-cuda/vendors/hip.h @@ -45,6 +45,7 @@ @@ -120,3 +121,21 @@ index 890c1036..1f06be80 100644 #define cudaDeviceSynchronize hipDeviceSynchronize #define cudaError_t hipError_t #define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled +diff --git a/src/llama.cpp b/src/llama.cpp +index fe5a7a835..d821a96a0 100644 +--- a/src/llama.cpp ++++ b/src/llama.cpp +@@ -267,10 +267,12 @@ static struct llama_model * llama_model_load_from_file_impl( + for (auto * dev : model->devices) { + ggml_backend_dev_props props; + ggml_backend_dev_get_props(dev, &props); ++ size_t memory_free, memory_total; ++ ggml_backend_dev_memory(dev, &memory_free, &memory_total); + LLAMA_LOG_INFO("%s: using device %s (%s) (%s) - %zu MiB free\n", __func__, + ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), + props.device_id ? props.device_id : "unknown id", +- props.memory_free/1024/1024); ++ memory_free/1024/1024); + } + + const int status = llama_model_load(path_model, splits, *model, params);