diff --git a/llm/server.go b/llm/server.go index e7c1029e83..079ecb2575 100644 --- a/llm/server.go +++ b/llm/server.go @@ -1486,7 +1486,10 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu serverReq.Header.Set("Content-Type", "application/json") res, err := http.DefaultClient.Do(serverReq) - if err != nil { + if err != nil && errors.Is(err, context.Canceled) { + // client closed connection + return err + } else if err != nil { slog.Error("post predict", "error", err) return errors.New("model runner has unexpectedly stopped, this may be due to resource limitations or an internal error, check ollama server logs for details") } diff --git a/server/sched.go b/server/sched.go index 5f8c61e099..1996ebfd85 100644 --- a/server/sched.go +++ b/server/sched.go @@ -229,8 +229,9 @@ func (s *Scheduler) processPending(ctx context.Context) { } if runnerToExpire == nil { - // Shouildn't happen - slog.Error("runner to expire was nil!") + // While we were performing load calculations, the loaded runner(s) unloaded in parallel + // so findRunnerToUnload returned no runners. We'll try again and the loadedCount should be zero + slog.Debug("runner to expire was nil, retrying") continue } // Trigger an expiration to unload once it's done