mirror of
https://github.com/ollama/ollama.git
synced 2025-10-10 22:13:22 +02:00
Detect very old CUDA GPUs and fall back to CPU
If we try to load the CUDA library on an old GPU, it panics and crashes the server. This checks the compute capability before we load the library so we can gracefully fall back to CPU mode.
This commit is contained in:
16
gpu/gpu.go
16
gpu/gpu.go
@@ -28,6 +28,9 @@ type handles struct {
|
||||
var gpuMutex sync.Mutex
|
||||
var gpuHandles *handles = nil
|
||||
|
||||
// TODO verify this is the correct min version
|
||||
const CudaComputeMajorMin = 5
|
||||
|
||||
// Note: gpuMutex must already be held
|
||||
func initGPUHandles() {
|
||||
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
||||
@@ -73,7 +76,18 @@ func GetGPUInfo() GpuInfo {
|
||||
log.Printf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err))
|
||||
C.free(unsafe.Pointer(memInfo.err))
|
||||
} else {
|
||||
resp.Library = "cuda"
|
||||
// Verify minimum compute capability
|
||||
var cc C.cuda_compute_capability_t
|
||||
C.cuda_compute_capability(*gpuHandles.cuda, &cc)
|
||||
if cc.err != nil {
|
||||
log.Printf("error looking up CUDA GPU compute capability: %s", C.GoString(cc.err))
|
||||
C.free(unsafe.Pointer(cc.err))
|
||||
} else if cc.major >= CudaComputeMajorMin {
|
||||
log.Printf("CUDA Compute Capability detected: %d.%d", cc.major, cc.minor)
|
||||
resp.Library = "cuda"
|
||||
} else {
|
||||
log.Printf("CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor)
|
||||
}
|
||||
}
|
||||
} else if gpuHandles.rocm != nil {
|
||||
C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
|
||||
|
Reference in New Issue
Block a user