mirror of
https://github.com/ollama/ollama.git
synced 2025-11-10 21:27:47 +01:00
logs: fix bogus "0 MiB free" log line (#12590)
On the llama runner, after the recent GGML bump a new log line reports incorrect 0 MiB free after our patch to remove memory from the props. This adjusts the llama.cpp code to fetch the actual free memory of the active device.
This commit is contained in:
4
llama/llama.cpp/src/llama.cpp
vendored
4
llama/llama.cpp/src/llama.cpp
vendored
@@ -267,10 +267,12 @@ static struct llama_model * llama_model_load_from_file_impl(
|
|||||||
for (auto * dev : model->devices) {
|
for (auto * dev : model->devices) {
|
||||||
ggml_backend_dev_props props;
|
ggml_backend_dev_props props;
|
||||||
ggml_backend_dev_get_props(dev, &props);
|
ggml_backend_dev_get_props(dev, &props);
|
||||||
|
size_t memory_free, memory_total;
|
||||||
|
ggml_backend_dev_memory(dev, &memory_free, &memory_total);
|
||||||
LLAMA_LOG_INFO("%s: using device %s (%s) (%s) - %zu MiB free\n", __func__,
|
LLAMA_LOG_INFO("%s: using device %s (%s) (%s) - %zu MiB free\n", __func__,
|
||||||
ggml_backend_dev_name(dev), ggml_backend_dev_description(dev),
|
ggml_backend_dev_name(dev), ggml_backend_dev_description(dev),
|
||||||
props.device_id ? props.device_id : "unknown id",
|
props.device_id ? props.device_id : "unknown id",
|
||||||
props.memory_free/1024/1024);
|
memory_free/1024/1024);
|
||||||
}
|
}
|
||||||
|
|
||||||
const int status = llama_model_load(path_model, splits, *model, params);
|
const int status = llama_model_load(path_model, splits, *model, params);
|
||||||
|
|||||||
@@ -12,10 +12,11 @@ unused then it can be reset to free these data structures.
|
|||||||
ggml/src/ggml-backend.cpp | 8 ++++++++
|
ggml/src/ggml-backend.cpp | 8 ++++++++
|
||||||
ggml/src/ggml-cuda/ggml-cuda.cu | 16 +++++++++++++++-
|
ggml/src/ggml-cuda/ggml-cuda.cu | 16 +++++++++++++++-
|
||||||
ggml/src/ggml-cuda/vendors/hip.h | 1 +
|
ggml/src/ggml-cuda/vendors/hip.h | 1 +
|
||||||
5 files changed, 29 insertions(+), 1 deletion(-)
|
src/llama.cpp | 4 +++-
|
||||||
|
6 files changed, 32 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
|
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
|
||||||
index 1ff53ed0..ba181d09 100644
|
index 1ff53ed03..ba181d09d 100644
|
||||||
--- a/ggml/include/ggml-backend.h
|
--- a/ggml/include/ggml-backend.h
|
||||||
+++ b/ggml/include/ggml-backend.h
|
+++ b/ggml/include/ggml-backend.h
|
||||||
@@ -178,6 +178,7 @@ extern "C" {
|
@@ -178,6 +178,7 @@ extern "C" {
|
||||||
@@ -27,7 +28,7 @@ index 1ff53ed0..ba181d09 100644
|
|||||||
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
|
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
|
||||||
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
|
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
|
||||||
diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h
|
diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h
|
||||||
index 3c3f22fc..43c91d9f 100644
|
index 3c3f22fc0..43c91d9f2 100644
|
||||||
--- a/ggml/src/ggml-backend-impl.h
|
--- a/ggml/src/ggml-backend-impl.h
|
||||||
+++ b/ggml/src/ggml-backend-impl.h
|
+++ b/ggml/src/ggml-backend-impl.h
|
||||||
@@ -195,6 +195,10 @@ extern "C" {
|
@@ -195,6 +195,10 @@ extern "C" {
|
||||||
@@ -42,7 +43,7 @@ index 3c3f22fc..43c91d9f 100644
|
|||||||
|
|
||||||
struct ggml_backend_device {
|
struct ggml_backend_device {
|
||||||
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
|
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
|
||||||
index 6ef5eeaf..0b757af5 100644
|
index 6ef5eeafa..0b757af59 100644
|
||||||
--- a/ggml/src/ggml-backend.cpp
|
--- a/ggml/src/ggml-backend.cpp
|
||||||
+++ b/ggml/src/ggml-backend.cpp
|
+++ b/ggml/src/ggml-backend.cpp
|
||||||
@@ -526,6 +526,14 @@ ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * par
|
@@ -526,6 +526,14 @@ ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * par
|
||||||
@@ -61,7 +62,7 @@ index 6ef5eeaf..0b757af5 100644
|
|||||||
GGML_ASSERT(device);
|
GGML_ASSERT(device);
|
||||||
return device->iface.get_buffer_type(device);
|
return device->iface.get_buffer_type(device);
|
||||||
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||||
index 811462c7..87c6c34a 100644
|
index 811462c79..87c6c34a4 100644
|
||||||
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||||
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||||
@@ -107,6 +107,11 @@ int ggml_cuda_get_device() {
|
@@ -107,6 +107,11 @@ int ggml_cuda_get_device() {
|
||||||
@@ -109,7 +110,7 @@ index 811462c7..87c6c34a 100644
|
|||||||
|
|
||||||
// backend reg
|
// backend reg
|
||||||
diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h
|
diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h
|
||||||
index 890c1036..1f06be80 100644
|
index 890c10364..1f06be80e 100644
|
||||||
--- a/ggml/src/ggml-cuda/vendors/hip.h
|
--- a/ggml/src/ggml-cuda/vendors/hip.h
|
||||||
+++ b/ggml/src/ggml-cuda/vendors/hip.h
|
+++ b/ggml/src/ggml-cuda/vendors/hip.h
|
||||||
@@ -45,6 +45,7 @@
|
@@ -45,6 +45,7 @@
|
||||||
@@ -120,3 +121,21 @@ index 890c1036..1f06be80 100644
|
|||||||
#define cudaDeviceSynchronize hipDeviceSynchronize
|
#define cudaDeviceSynchronize hipDeviceSynchronize
|
||||||
#define cudaError_t hipError_t
|
#define cudaError_t hipError_t
|
||||||
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
|
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
|
||||||
|
diff --git a/src/llama.cpp b/src/llama.cpp
|
||||||
|
index fe5a7a835..d821a96a0 100644
|
||||||
|
--- a/src/llama.cpp
|
||||||
|
+++ b/src/llama.cpp
|
||||||
|
@@ -267,10 +267,12 @@ static struct llama_model * llama_model_load_from_file_impl(
|
||||||
|
for (auto * dev : model->devices) {
|
||||||
|
ggml_backend_dev_props props;
|
||||||
|
ggml_backend_dev_get_props(dev, &props);
|
||||||
|
+ size_t memory_free, memory_total;
|
||||||
|
+ ggml_backend_dev_memory(dev, &memory_free, &memory_total);
|
||||||
|
LLAMA_LOG_INFO("%s: using device %s (%s) (%s) - %zu MiB free\n", __func__,
|
||||||
|
ggml_backend_dev_name(dev), ggml_backend_dev_description(dev),
|
||||||
|
props.device_id ? props.device_id : "unknown id",
|
||||||
|
- props.memory_free/1024/1024);
|
||||||
|
+ memory_free/1024/1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
const int status = llama_model_load(path_model, splits, *model, params);
|
||||||
|
|||||||
Reference in New Issue
Block a user