From cff3f44f4a4097de864d70d9a95f31c62e8ecdfa Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <daniel@ollama.com>
Date: Mon, 1 Jul 2024 09:43:59 -0700
Subject: [PATCH] Fix case for NumCtx

---
 server/sched.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/server/sched.go b/server/sched.go
index 87da1db47..71b535ae2 100644
--- a/server/sched.go
+++ b/server/sched.go
@@ -23,7 +23,7 @@ type LlmRequest struct {
 	ctx             context.Context //nolint:containedctx
 	model           *Model
 	opts            api.Options
-	origNumCTX      int // Track the initial ctx request
+	origNumCtx      int // Track the initial ctx request
 	sessionDuration time.Duration
 	successCh       chan *runnerRef
 	errCh           chan error
@@ -118,8 +118,8 @@ func (s *Scheduler) processPending(ctx context.Context) {
 		case pending := <-s.pendingReqCh:
 			// Block other requests until we get this pending request running
 			pending.schedAttempts++
-			if pending.origNumCTX == 0 {
-				pending.origNumCTX = pending.opts.NumCtx
+			if pending.origNumCtx == 0 {
+				pending.origNumCtx = pending.opts.NumCtx
 			}
 
 			if pending.ctx.Err() != nil {
@@ -135,7 +135,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
 			}
 			// Keep NumCtx and numParallel in sync
 			if numParallel > 1 {
-				pending.opts.NumCtx = pending.origNumCTX * numParallel
+				pending.opts.NumCtx = pending.origNumCtx * numParallel
 			}
 
 			for {
@@ -197,7 +197,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
 						// simplifying assumption of defaultParallel when in CPU mode
 						if numParallel <= 0 {
 							numParallel = defaultParallel
-							pending.opts.NumCtx = pending.origNumCTX * numParallel
+							pending.opts.NumCtx = pending.origNumCtx * numParallel
 						}
 
 						if loadedCount == 0 {
@@ -691,7 +691,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numP
 
 		// First attempt to fit the model into a single GPU
 		for _, p := range numParallelToTry {
-			req.opts.NumCtx = req.origNumCTX * p
+			req.opts.NumCtx = req.origNumCtx * p
 			if !envconfig.SchedSpread {
 				for _, g := range sgl {
 					if ok, estimatedVRAM = llm.PredictServerFit([]gpu.GpuInfo{g}, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
@@ -709,7 +709,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numP
 
 		// Now try all the GPUs
 		for _, p := range numParallelToTry {
-			req.opts.NumCtx = req.origNumCTX * p
+			req.opts.NumCtx = req.origNumCtx * p
 			if ok, estimatedVRAM = llm.PredictServerFit(sgl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
 				slog.Info("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", sgl[0].Library, "parallel", p, "required", format.HumanBytes2(estimatedVRAM))
 				*numParallel = p