mirror of
https://github.com/ollama/ollama.git
synced 2025-11-10 22:07:45 +01:00
DRY out the runner lifecycle code (#12540)
* DRY out the runner lifecycle code Now that discovery uses the runners as well, this unifies the runner spawning code into a single place. This also unifies GPU discovery types with the newer ml.DeviceInfo * win: make incremental builds better Place build artifacts in discrete directories so incremental builds don't have to start fresh * Adjust sort order to consider iGPUs * handle cpu inference oom scenarios * review comments
This commit is contained in:
@@ -10,7 +10,7 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/discover"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
"github.com/ollama/ollama/ml"
|
||||
)
|
||||
@@ -54,13 +54,7 @@ func TestEstimateGPULayers(t *testing.T) {
|
||||
}
|
||||
|
||||
// Simple CPU scenario
|
||||
gpus := []discover.GpuInfo{
|
||||
{
|
||||
DeviceID: ml.DeviceID{
|
||||
Library: "cpu",
|
||||
},
|
||||
},
|
||||
}
|
||||
gpus := []ml.DeviceInfo{}
|
||||
projectors := []string{}
|
||||
opts := api.DefaultOptions()
|
||||
t.Run("cpu", func(t *testing.T) {
|
||||
@@ -77,19 +71,17 @@ func TestEstimateGPULayers(t *testing.T) {
|
||||
memoryLayerOutput := uint64(4)
|
||||
|
||||
// Dual CUDA scenario with asymmetry
|
||||
gpuMinimumMemory := uint64(2048)
|
||||
gpus = []discover.GpuInfo{
|
||||
gpuMinimumMemory := uint64(457 * format.MebiByte)
|
||||
gpus = []ml.DeviceInfo{
|
||||
{
|
||||
DeviceID: ml.DeviceID{
|
||||
Library: "cuda",
|
||||
Library: "CUDA",
|
||||
},
|
||||
MinimumMemory: gpuMinimumMemory,
|
||||
},
|
||||
{
|
||||
DeviceID: ml.DeviceID{
|
||||
Library: "cuda",
|
||||
Library: "CUDA",
|
||||
},
|
||||
MinimumMemory: gpuMinimumMemory,
|
||||
},
|
||||
}
|
||||
// Nested array: GPU0 layer space, GPU1 layer space, expected gpu0, expected gpu1
|
||||
|
||||
Reference in New Issue
Block a user