mimic logs for layers on new engine (#11278)

This adds some extra logs to make the new engine a bit more consistent with the llama engine.
2025-08-26 14:11:39 +02:00 · 2025-07-02 16:38:36 -07:00
parent 5d8c173529
commit 2c4ce40334
1 changed files with 18 additions and 0 deletions
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -358,6 +358,24 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
 		bbs[c] = b
 	}

+	// Mimic llama runner logs summarizing layers and memory
+	slog.Info(fmt.Sprintf("offloading %d repeating layers to GPU", max(0, params.NumGPULayers-1)))
+	gpuLayers := 0
+	switch C.ggml_backend_dev_type(output.d) {
+	case 0: // CPU
+		slog.Info("offloading output layer to CPU")
+	case 1: // GPU
+		slog.Info("offloading output layer to GPU")
+		gpuLayers++
+	case 2: // ACCEL
+		slog.Info("offloading output layer to ACCEL")
+	}
+	for _, layer := range layers {
+		if C.ggml_backend_dev_type(layer.d) == 1 {
+			gpuLayers++
+		}
+	}
+	slog.Info(fmt.Sprintf("offloaded %d/%d layers to GPU", gpuLayers, len(layers)+1))
 	for bs := range maps.Values(bbs) {
 		slog.Info("model weights", "buffer", C.GoString(C.ggml_backend_buffer_name(bs)), "size", format.HumanBytes2(uint64(C.ggml_backend_buffer_get_size(bs))))
 	}