mirror of
https://github.com/ollama/ollama.git
synced 2025-11-11 15:36:58 +01:00
Bump VRAM buffer back up
Under stress scenarios we're seeing OOMs so this should help stabilize the allocations under heavy concurrency stress.
This commit is contained in:
@@ -31,8 +31,8 @@ type handles struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
cudaMinimumMemory = 256 * format.MebiByte
|
cudaMinimumMemory = 457 * format.MebiByte
|
||||||
rocmMinimumMemory = 256 * format.MebiByte
|
rocmMinimumMemory = 457 * format.MebiByte
|
||||||
)
|
)
|
||||||
|
|
||||||
var gpuMutex sync.Mutex
|
var gpuMutex sync.Mutex
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
metalMinimumMemory = 384 * format.MebiByte
|
metalMinimumMemory = 512 * format.MebiByte
|
||||||
)
|
)
|
||||||
|
|
||||||
func GetGPUInfo() GpuInfoList {
|
func GetGPUInfo() GpuInfoList {
|
||||||
|
|||||||
Reference in New Issue
Block a user