From 1d09e0143142616bad0841325e88b6934966493b Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Fri, 22 Aug 2025 15:40:32 -0700 Subject: [PATCH] server: update completion request signature and update token repeat --- llm/server.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/llm/server.go b/llm/server.go index ed5663bd9e..46125bc9ca 100644 --- a/llm/server.go +++ b/llm/server.go @@ -31,6 +31,7 @@ import ( "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/format" "github.com/ollama/ollama/fs/ggml" + "github.com/ollama/ollama/harmony" "github.com/ollama/ollama/llama" "github.com/ollama/ollama/logutil" "github.com/ollama/ollama/ml" @@ -1347,7 +1348,9 @@ type CompletionRequest struct { Images []ImageData Options *api.Options - Grammar string // set before sending the request to the subprocess + Grammar string // set before sending the request to the subprocess + FunctionNameMap *harmony.FunctionNameMap + PrefillContent *bool } // DoneReason represents the reason why a completion response is done @@ -1500,7 +1503,7 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu return fmt.Errorf("error unmarshalling llm prediction response: %v", err) } switch { - case strings.TrimSpace(c.Content) == lastToken: + case lastToken != "" && (strings.TrimSpace(c.Content) == lastToken || strings.TrimSpace(c.Thinking) == lastToken): tokenRepeat++ default: lastToken = strings.TrimSpace(c.Content) @@ -1513,14 +1516,14 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu return ctx.Err() } - if c.Content != "" || c.Thinking != "" || len(c.ToolCalls) > 0 { - fn(c) - } - if c.Done { fn(c) return nil } + + if c.Content != "" || c.Thinking != "" || len(c.ToolCalls) > 0 { + fn(c) + } } }