mirror of
https://github.com/ollama/ollama.git
synced 2025-08-26 14:11:39 +02:00
mimic logs for layers on new engine (#11278)
This adds some extra logs to make the new engine a bit more consistent with the llama engine.
This commit is contained in:
@@ -358,6 +358,24 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
|
||||
bbs[c] = b
|
||||
}
|
||||
|
||||
// Mimic llama runner logs summarizing layers and memory
|
||||
slog.Info(fmt.Sprintf("offloading %d repeating layers to GPU", max(0, params.NumGPULayers-1)))
|
||||
gpuLayers := 0
|
||||
switch C.ggml_backend_dev_type(output.d) {
|
||||
case 0: // CPU
|
||||
slog.Info("offloading output layer to CPU")
|
||||
case 1: // GPU
|
||||
slog.Info("offloading output layer to GPU")
|
||||
gpuLayers++
|
||||
case 2: // ACCEL
|
||||
slog.Info("offloading output layer to ACCEL")
|
||||
}
|
||||
for _, layer := range layers {
|
||||
if C.ggml_backend_dev_type(layer.d) == 1 {
|
||||
gpuLayers++
|
||||
}
|
||||
}
|
||||
slog.Info(fmt.Sprintf("offloaded %d/%d layers to GPU", gpuLayers, len(layers)+1))
|
||||
for bs := range maps.Values(bbs) {
|
||||
slog.Info("model weights", "buffer", C.GoString(C.ggml_backend_buffer_name(bs)), "size", format.HumanBytes2(uint64(C.ggml_backend_buffer_get_size(bs))))
|
||||
}
|
||||
|
Reference in New Issue
Block a user