DRY out the runner lifecycle code (#12540)

* DRY out the runner lifecycle code Now that discovery uses the runners as well, this unifies the runner spawning code into a single place. This also unifies GPU discovery types with the newer ml.DeviceInfo * win: make incremental builds better Place build artifacts in discrete directories so incremental builds don't have to start fresh * Adjust sort order to consider iGPUs * handle cpu inference oom scenarios * review comments
2025-11-10 22:07:45 +01:00 · 2025-10-23 11:20:02 -07:00
parent 1c093e97af
commit 3258a89b6e
16 changed files with 720 additions and 924 deletions
--- a/llm/memory_test.go
+++ b/llm/memory_test.go
@@ -10,7 +10,7 @@ import (
 	"github.com/stretchr/testify/require"

 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/discover"
+	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/fs/ggml"
 	"github.com/ollama/ollama/ml"
 )
@@ -54,13 +54,7 @@ func TestEstimateGPULayers(t *testing.T) {
 	}

 	// Simple CPU scenario
-	gpus := []discover.GpuInfo{
-		{
-			DeviceID: ml.DeviceID{
-				Library: "cpu",
-			},
-		},
-	}
+	gpus := []ml.DeviceInfo{}
 	projectors := []string{}
 	opts := api.DefaultOptions()
 	t.Run("cpu", func(t *testing.T) {
@@ -77,19 +71,17 @@ func TestEstimateGPULayers(t *testing.T) {
 	memoryLayerOutput := uint64(4)

 	// Dual CUDA scenario with asymmetry
-	gpuMinimumMemory := uint64(2048)
-	gpus = []discover.GpuInfo{
+	gpuMinimumMemory := uint64(457 * format.MebiByte)
+	gpus = []ml.DeviceInfo{
 		{
 			DeviceID: ml.DeviceID{
-				Library: "cuda",
+				Library: "CUDA",
 			},
-			MinimumMemory: gpuMinimumMemory,
 		},
 		{
 			DeviceID: ml.DeviceID{
-				Library: "cuda",
+				Library: "CUDA",
 			},
-			MinimumMemory: gpuMinimumMemory,
 		},
 	}
 	// Nested array: GPU0 layer space, GPU1 layer space, expected gpu0, expected gpu1