partial offloading

This commit is contained in:
Michael Yang
2024-04-05 14:50:38 -07:00
parent 8b2c10061c
commit 7e33a017c0
6 changed files with 98 additions and 86 deletions

View File

@@ -243,7 +243,7 @@ func getCPUMem() (memInfo, error) {
return ret, nil
}
func CheckVRAM() (int64, error) {
func CheckVRAM() (uint64, error) {
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
if userLimit != "" {
avail, err := strconv.ParseInt(userLimit, 10, 64)
@@ -251,11 +251,11 @@ func CheckVRAM() (int64, error) {
return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
}
slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
return avail, nil
return uint64(avail), nil
}
gpuInfo := GetGPUInfo()
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
return int64(gpuInfo.FreeMemory), nil
return gpuInfo.FreeMemory, nil
}
return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation

View File

@@ -17,7 +17,7 @@ import (
)
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
func CheckVRAM() (int64, error) {
func CheckVRAM() (uint64, error) {
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
if userLimit != "" {
avail, err := strconv.ParseInt(userLimit, 10, 64)
@@ -25,15 +25,14 @@ func CheckVRAM() (int64, error) {
return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
}
slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
return avail, nil
return uint64(avail), nil
}
if runtime.GOARCH == "amd64" {
// gpu not supported, this may not be metal
return 0, nil
}
recommendedMaxVRAM := int64(C.getRecommendedMaxVRAM())
return recommendedMaxVRAM, nil
return uint64(C.getRecommendedMaxVRAM()), nil
}
func GetGPUInfo() GpuInfo {

View File

@@ -15,7 +15,7 @@ type GpuInfo struct {
Variant string `json:"variant,omitempty"`
// MinimumMemory represents the minimum memory required to use the GPU
MinimumMemory int64 `json:"-"`
MinimumMemory uint64 `json:"-"`
// TODO add other useful attributes about the card here for discovery information
}