ollamarunner: Don't truncate a SameBatch

When truncating inputs to the the context window at the beginning of a sequence, we remove the minimum amount possible. However, this may cause us to truncate to the middle of a set of inputs that the model specified should not be split up. To avoid this, we need to remove the rest of the partial batch.
2025-04-12 13:49:43 +02:00 · 2025-04-01 15:01:23 -07:00 · 2025-04-01 15:01:23 -07:00 · 493385eb3e
commit 493385eb3e
parent 9876c9faa4
2 changed files with 31 additions and 4 deletions
--- a/runner/ollamarunner/cache.go
+++ b/runner/ollamarunner/cache.go
@ -225,6 +225,8 @@ func countCommonPrefix(a []input.Input, b []input.Input) int32 {
 	return count
 }

+// TODO(jessegross): If we need to reprocess the inputs we should ensure that
+// we don't split up a SameBatch
 func (c *InputCache) ShiftDiscard(inputLen int32, numKeep int32) int32 {
 	targetFree := (c.numCtx - numKeep) / 2
 	targetFree = max(targetFree, 1)
--- a/runner/ollamarunner/runner.go
+++ b/runner/ollamarunner/runner.go
@ -115,16 +115,41 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
 		params.numKeep = int32(len(inputs))
 	}

-	// TODO(jessegross): We should ensure that we always leave minBatch of context space to shift,
-	// otherwise we might truncate or split the batch against the model's wishes
-
 	// Ensure that at least 1 input can be discarded during shift
 	params.numKeep = min(params.numKeep, s.cache.numCtx-1)

 	if int32(len(inputs)) > s.cache.numCtx {
 		discard := int32(len(inputs)) - s.cache.numCtx
+		promptStart := params.numKeep + discard
+
+		// If we need to truncate in the middle of a unbreakable batch, remove the entire batch
+		sameBatch := 0
+		for i, inp := range inputs {
+			if sameBatch > 0 {
+				sameBatch--
+
+				if promptStart == int32(i) {
+					promptStart++
+				}
+			} else if promptStart == int32(i) {
+				break
+			}
+
+			if inp.SameBatch != 0 {
+				if int32(i) < params.numKeep {
+					return nil, fmt.Errorf("SameBatch may not be specified within numKeep (index: %v numKeep: %v SameBatch: %v)", i, params.numKeep, inp.SameBatch)
+				}
+
+				sameBatch = inp.SameBatch
+			}
+		}
+
+		if promptStart >= int32(len(inputs)) {
+			return nil, errors.New("entire prompt removed by truncation")
+		}
+
 		newInputs := inputs[:params.numKeep]
-		newInputs = append(newInputs, inputs[params.numKeep+discard:]...)
+		newInputs = append(newInputs, inputs[promptStart:]...)

 		slog.Warn("truncating input prompt", "limit", s.cache.numCtx, "prompt", len(inputs), "keep", params.numKeep, "new", len(newInputs))
 		inputs = newInputs