diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index 43104092ae..f8727490d7 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -358,6 +358,24 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) { bbs[c] = b } + // Mimic llama runner logs summarizing layers and memory + slog.Info(fmt.Sprintf("offloading %d repeating layers to GPU", max(0, params.NumGPULayers-1))) + gpuLayers := 0 + switch C.ggml_backend_dev_type(output.d) { + case 0: // CPU + slog.Info("offloading output layer to CPU") + case 1: // GPU + slog.Info("offloading output layer to GPU") + gpuLayers++ + case 2: // ACCEL + slog.Info("offloading output layer to ACCEL") + } + for _, layer := range layers { + if C.ggml_backend_dev_type(layer.d) == 1 { + gpuLayers++ + } + } + slog.Info(fmt.Sprintf("offloaded %d/%d layers to GPU", gpuLayers, len(layers)+1)) for bs := range maps.Values(bbs) { slog.Info("model weights", "buffer", C.GoString(C.ggml_backend_buffer_name(bs)), "size", format.HumanBytes2(uint64(C.ggml_backend_buffer_get_size(bs)))) }