Increase minimum CUDA memory allocation overhead and fix minimum overhead for multi-gpu (#1896)

* increase minimum cuda overhead and fix minimum overhead for multi-gpu

* fix multi gpu overhead

* limit overhead to 10% of all gpus

* better wording

* allocate fixed amount before layers

* fixed only includes graph alloc
This commit is contained in:
Jeffrey Morgan
2024-01-10 19:08:51 -05:00
committed by GitHub
parent f83881390f
commit b24e8d17b2
2 changed files with 19 additions and 12 deletions

View File

@@ -184,10 +184,11 @@ func getCPUMem() (memInfo, error) {
func CheckVRAM() (int64, error) {
gpuInfo := GetGPUInfo()
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
// leave 10% or 384Mi of VRAM free for unaccounted for overhead
overhead := gpuInfo.FreeMemory * uint64(gpuInfo.DeviceCount) / 10
if overhead < 384*1024*1024 {
overhead = 384 * 1024 * 1024
// leave 10% or 512MiB of VRAM free per GPU to handle unaccounted for overhead
overhead := gpuInfo.FreeMemory / 10
gpus := uint64(gpuInfo.DeviceCount)
if overhead < gpus*512*1024*1024 {
overhead = gpus * 512 * 1024 * 1024
}
return int64(gpuInfo.FreeMemory - overhead), nil
}