Switch windows build to fully dynamic

Refactor where we store build outputs, and support a fully dynamic loading
model on windows so the base executable has no special dependencies thus
doesn't require a special PATH.
This commit is contained in:
Daniel Hiltgen
2023-12-23 11:35:44 -08:00
parent 9a70aecccb
commit d966b730ac
17 changed files with 379 additions and 228 deletions

View File

@@ -13,6 +13,7 @@ import "C"
import (
"fmt"
"log"
"runtime"
"sync"
"unsafe"
@@ -65,15 +66,14 @@ func GetGPUInfo() GpuInfo {
}
var memInfo C.mem_info_t
resp := GpuInfo{"", "", 0, 0}
resp := GpuInfo{"", 0, 0}
if gpuHandles.cuda != nil {
C.cuda_check_vram(*gpuHandles.cuda, &memInfo)
if memInfo.err != nil {
log.Printf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
} else {
resp.Driver = "CUDA"
resp.Library = "cuda_server"
resp.Library = "cuda"
}
} else if gpuHandles.rocm != nil {
C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
@@ -81,15 +81,17 @@ func GetGPUInfo() GpuInfo {
log.Printf("error looking up ROCm GPU memory: %s", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
} else {
resp.Driver = "ROCM"
resp.Library = "rocm_server"
resp.Library = "rocm"
}
}
if resp.Driver == "" {
if resp.Library == "" {
C.cpu_check_ram(&memInfo)
resp.Driver = "CPU"
// In the future we may offer multiple CPU variants to tune CPU features
resp.Library = "default"
if runtime.GOOS == "windows" {
resp.Library = "cpu"
} else {
resp.Library = "default"
}
}
if memInfo.err != nil {
log.Printf("error looking up CPU memory: %s", C.GoString(memInfo.err))
@@ -103,7 +105,7 @@ func GetGPUInfo() GpuInfo {
func CheckVRAM() (int64, error) {
gpuInfo := GetGPUInfo()
if gpuInfo.FreeMemory > 0 && gpuInfo.Driver != "CPU" {
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
return int64(gpuInfo.FreeMemory), nil
}
return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
@@ -114,7 +116,7 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
return opts.NumGPU
}
info := GetGPUInfo()
if info.Driver == "CPU" {
if info.Library == "cpu" || info.Library == "default" {
return 0
}
@@ -128,7 +130,7 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
// 75% of the absolute max number of layers we can fit in available VRAM, off-loading too many layers to the GPU can cause OOM errors
layers := int(info.FreeMemory/bytesPerLayer) * 3 / 4
log.Printf("%d MB VRAM available, loading up to %d %s GPU layers out of %d", info.FreeMemory/(1024*1024), layers, info.Driver, numLayer)
log.Printf("%d MB VRAM available, loading up to %d %s GPU layers out of %d", info.FreeMemory/(1024*1024), layers, info.Library, numLayer)
return layers
}

View File

@@ -20,7 +20,6 @@ func GetGPUInfo() GpuInfo {
// TODO - Metal vs. x86 macs...
return GpuInfo{
Driver: "METAL",
Library: "default",
TotalMemory: 0,
FreeMemory: 0,

View File

@@ -9,7 +9,7 @@ import (
func TestBasicGetGPUInfo(t *testing.T) {
info := GetGPUInfo()
assert.Contains(t, "CUDA ROCM CPU METAL", info.Driver)
assert.Contains(t, "cuda rocm cpu default", info.Library)
switch runtime.GOOS {
case "darwin":

View File

@@ -2,7 +2,6 @@ package gpu
// Beginning of an `ollama info` command
type GpuInfo struct {
Driver string `json:"driver,omitempty"`
Library string `json:"library,omitempty"`
TotalMemory uint64 `json:"total_memory,omitempty"`
FreeMemory uint64 `json:"free_memory,omitempty"`