From 05d53457af8fda79c0e3884f316144d6c2aed5b9 Mon Sep 17 00:00:00 2001 From: russcoss Date: Tue, 16 Sep 2025 20:14:21 -0400 Subject: [PATCH] refactor: use the built-in max/min to simplify the code (#12280) Signed-off-by: russcoss --- runner/llamarunner/cache.go | 7 +------ runner/ollamarunner/cache.go | 7 +------ server/internal/internal/backoff/backoff.go | 5 +---- server/sched.go | 5 +---- 4 files changed, 4 insertions(+), 20 deletions(-) diff --git a/runner/llamarunner/cache.go b/runner/llamarunner/cache.go index 44b246134b..9ed1c2924a 100644 --- a/runner/llamarunner/cache.go +++ b/runner/llamarunner/cache.go @@ -204,13 +204,8 @@ func (c *InputCache) ShiftDiscard(inputLen int, numKeep int) int { targetFree = max(targetFree, 1) currentFree := c.numCtx - inputLen - discard := targetFree - currentFree - if discard < 0 { - discard = 0 - } - - return discard + return max(targetFree-currentFree, 0) } type ErrReprocessInputs struct { diff --git a/runner/ollamarunner/cache.go b/runner/ollamarunner/cache.go index f558f7b87a..a3ffc3bd29 100644 --- a/runner/ollamarunner/cache.go +++ b/runner/ollamarunner/cache.go @@ -242,13 +242,8 @@ func (c *InputCache) ShiftDiscard(inputLen int32, numKeep int32) int32 { targetFree = max(targetFree, 1) currentFree := c.numCtx - inputLen - discard := targetFree - currentFree - if discard < 0 { - discard = 0 - } - - return discard + return max(targetFree-currentFree, 0) } type ErrReprocessInputs struct { diff --git a/server/internal/internal/backoff/backoff.go b/server/internal/internal/backoff/backoff.go index 1f0634f7c0..08b4ed7f99 100644 --- a/server/internal/internal/backoff/backoff.go +++ b/server/internal/internal/backoff/backoff.go @@ -25,10 +25,7 @@ func Loop(ctx context.Context, maxBackoff time.Duration) iter.Seq2[int, error] { // n^2 backoff timer is a little smoother than the // common choice of 2^n. - d := time.Duration(n*n) * 10 * time.Millisecond - if d > maxBackoff { - d = maxBackoff - } + d := min(time.Duration(n*n)*10*time.Millisecond, maxBackoff) // Randomize the delay between 0.5-1.5 x msec, in order // to prevent accidental "thundering herd" problems. d = time.Duration(float64(d) * (rand.Float64() + 0.5)) diff --git a/server/sched.go b/server/sched.go index c501c0e85d..74aa406af5 100644 --- a/server/sched.go +++ b/server/sched.go @@ -382,10 +382,7 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm // load creates a new model based on req and loads it. If requireFull is true then the model must be loaded fully onto GPUs // (if any). Returns whether the scheduler needs to evict a model to make this one fit. func (s *Scheduler) load(req *LlmRequest, f *ggml.GGML, gpus discover.GpuInfoList, requireFull bool) bool { - numParallel := int(envconfig.NumParallel()) - if numParallel < 1 { - numParallel = 1 - } + numParallel := max(int(envconfig.NumParallel()), 1) // Embedding models should always be loaded with parallel=1 if req.model.CheckCapabilities(model.CapabilityCompletion) != nil {