From 499ae7311fd26cb4e655ebea69712de3e242f629 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 9 May 2025 16:51:47 -0700 Subject: [PATCH] ollamarunner: Base cached tokens on current prompt When we restore a sequence from the cache, we split the prompt into the already used tokens (stored in the cache) and new tokens that need to be processed. Currently, the references to the used tokens are coming from the stored previous sequence. However, even though we know that the used tokens are semantically equivalent to the prefix of the prompt, tokens can contain pointers which are no longer valid. As a result, it is better to get the used tokens from the prompt, which has currently valid pointers. This doesn't currently have any impact because it isn't possible to reuse the pointers (which are tensors) anyways. However, it becomes an issue once we can. --- runner/llamarunner/cache.go | 2 +- runner/ollamarunner/cache.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runner/llamarunner/cache.go b/runner/llamarunner/cache.go index 2e55b09dc..2e273e69c 100644 --- a/runner/llamarunner/cache.go +++ b/runner/llamarunner/cache.go @@ -104,8 +104,8 @@ func (c *InputCache) LoadCacheSlot(prompt []input, cachePrompt bool) (*InputCach slog.Debug("loading cache slot", "id", slot.Id, "cache", len(slot.Inputs), "prompt", len(prompt), "used", numPast, "remaining", len(prompt)-numPast) + slot.Inputs = prompt[:numPast] prompt = prompt[numPast:] - slot.Inputs = slot.Inputs[:numPast] return slot, prompt, nil } diff --git a/runner/ollamarunner/cache.go b/runner/ollamarunner/cache.go index 2138d7988..43880a41b 100644 --- a/runner/ollamarunner/cache.go +++ b/runner/ollamarunner/cache.go @@ -136,8 +136,8 @@ func (c *InputCache) LoadCacheSlot(prompt []input.Input) (*InputCacheSlot, []inp slog.Debug("loading cache slot", "id", slot.Id, "cache", len(slot.Inputs), "prompt", len(prompt), "used", numPast, "remaining", int32(len(prompt))-numPast) + slot.Inputs = prompt[:numPast] prompt = prompt[numPast:] - slot.Inputs = slot.Inputs[:numPast] return slot, prompt, nil }