mirror of
https://github.com/ollama/ollama.git
synced 2025-09-28 19:47:42 +02:00
Increase minimum CUDA memory allocation overhead and fix minimum overhead for multi-gpu (#1896)
* increase minimum cuda overhead and fix minimum overhead for multi-gpu * fix multi gpu overhead * limit overhead to 10% of all gpus * better wording * allocate fixed amount before layers * fixed only includes graph alloc
This commit is contained in:
@@ -184,10 +184,11 @@ func getCPUMem() (memInfo, error) {
|
||||
func CheckVRAM() (int64, error) {
|
||||
gpuInfo := GetGPUInfo()
|
||||
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
|
||||
// leave 10% or 384Mi of VRAM free for unaccounted for overhead
|
||||
overhead := gpuInfo.FreeMemory * uint64(gpuInfo.DeviceCount) / 10
|
||||
if overhead < 384*1024*1024 {
|
||||
overhead = 384 * 1024 * 1024
|
||||
// leave 10% or 512MiB of VRAM free per GPU to handle unaccounted for overhead
|
||||
overhead := gpuInfo.FreeMemory / 10
|
||||
gpus := uint64(gpuInfo.DeviceCount)
|
||||
if overhead < gpus*512*1024*1024 {
|
||||
overhead = gpus * 512 * 1024 * 1024
|
||||
}
|
||||
return int64(gpuInfo.FreeMemory - overhead), nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user