llamarunner: Init GGML before printing system info

We currently print system info before the GGML backends are loaded. This results in only getting information about the default lowest common denominator runner. If we move up the GGML init then we can see what we are actually running. Before: time=2025-02-14T11:15:07.606-08:00 level=INFO source=runner.go:935 msg=system info="CPU : LLAMAFILE = 1 | CPU : LLAMAFILE = 1 | cgo(gcc)" threads=24 After: time=2025-02-14T11:16:02.936-08:00 level=INFO source=runner.go:935 msg=system info="CPU : LLAMAFILE = 1 | CPU : LLAMAFILE = 1 | CUDA : ARCHS = 890 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | LLAMAFILE = 1 | cgo(gcc)" threads=24
2025-05-31 16:19:17 +02:00 · 2025-02-14 11:13:34 -08:00 · 2025-02-14 11:13:34 -08:00 · 010313bb63
commit 010313bb63
parent 5296f487a8
1 changed files with 2 additions and 2 deletions
--- a/runner/llamarunner/runner.go
+++ b/runner/llamarunner/runner.go
@ -845,8 +845,6 @@ func (s *Server) loadModel(
 	threads int,
 	multiUserCache bool,
 ) {
-	llama.BackendInit()
-
 	var err error
 	s.model, err = llama.LoadModelFromFile(mpath, params)
 	if err != nil {
@ -932,6 +930,8 @@ func Execute(args []string) error {
 	})
 	slog.SetDefault(slog.New(handler))
 	slog.Info("starting go runner")
+
+	llama.BackendInit()
 	slog.Info("system", "info", llama.PrintSystemInfo(), "threads", *threads)

 	server := &Server{