DRY out the runner lifecycle code (#12540)

* DRY out the runner lifecycle code

Now that discovery uses the runners as well, this unifies the runner spawning code
into a single place.  This also unifies GPU discovery types with the newer ml.DeviceInfo

* win: make incremental builds better

Place build artifacts in discrete directories so incremental builds don't have to start fresh

* Adjust sort order to consider iGPUs

* handle cpu inference oom scenarios

* review comments
This commit is contained in:
Daniel Hiltgen
2025-10-23 11:20:02 -07:00
committed by GitHub
parent 1c093e97af
commit 3258a89b6e
16 changed files with 720 additions and 924 deletions

View File

@@ -10,7 +10,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/discover"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/ml"
)
@@ -54,13 +54,7 @@ func TestEstimateGPULayers(t *testing.T) {
}
// Simple CPU scenario
gpus := []discover.GpuInfo{
{
DeviceID: ml.DeviceID{
Library: "cpu",
},
},
}
gpus := []ml.DeviceInfo{}
projectors := []string{}
opts := api.DefaultOptions()
t.Run("cpu", func(t *testing.T) {
@@ -77,19 +71,17 @@ func TestEstimateGPULayers(t *testing.T) {
memoryLayerOutput := uint64(4)
// Dual CUDA scenario with asymmetry
gpuMinimumMemory := uint64(2048)
gpus = []discover.GpuInfo{
gpuMinimumMemory := uint64(457 * format.MebiByte)
gpus = []ml.DeviceInfo{
{
DeviceID: ml.DeviceID{
Library: "cuda",
Library: "CUDA",
},
MinimumMemory: gpuMinimumMemory,
},
{
DeviceID: ml.DeviceID{
Library: "cuda",
Library: "CUDA",
},
MinimumMemory: gpuMinimumMemory,
},
}
// Nested array: GPU0 layer space, GPU1 layer space, expected gpu0, expected gpu1