From 03408f34374977dee2855b511226ff8c507a2401 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@ollama.com>
Date: Wed, 9 Oct 2024 16:55:34 -0700
Subject: [PATCH] server: Don't clear cmd when closing a server

Close can be called on an LLM server if the runner subprocess dies.
However, the Ollama scheduler code may not know about this yet and
still try to access it. In this case, it is important that 'cmd'
is still available as it is used to check on the status of the
subprocess. If this happens, Kill may be called twice on the subprocess -
that is fine.

In addition, model unloading may race with new accesses, so we should
hold a lock around this. This may result in the model being reloaded
after the first close call - this is also fine as close will be called
again later.
---
 llm/server.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llm/server.go b/llm/server.go
index c0d44254b..a743182c0 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -1086,10 +1086,13 @@ func (s *llmServer) Detokenize(ctx context.Context, tokens []int) (string, error
 }
 
 func (s *llmServer) Close() error {
+	s.modelLock.Lock()
 	if s.model != nil {
 		llama.FreeModel(s.model)
 		s.model = nil
 	}
+	s.modelLock.Unlock()
+
 	if s.cmd != nil {
 		slog.Debug("stopping llama server")
 		if err := s.cmd.Process.Kill(); err != nil {
@@ -1100,7 +1103,6 @@ func (s *llmServer) Close() error {
 			slog.Debug("waiting for llama server to exit")
 			<-s.done
 		}
-		s.cmd = nil
 
 		slog.Debug("llama server stopped")
 	}