From 05cebf1f21d5ac1f24c2baeb1cd1802d2873086c Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Fri, 22 Aug 2025 15:40:32 -0700 Subject: [PATCH] server: update completion request signature and update token repeat --- llm/server.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/llm/server.go b/llm/server.go index 8fdf028d3c..2c4c367e4f 100644 --- a/llm/server.go +++ b/llm/server.go @@ -31,6 +31,7 @@ import ( "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/format" "github.com/ollama/ollama/fs/ggml" + "github.com/ollama/ollama/harmony" "github.com/ollama/ollama/llama" "github.com/ollama/ollama/logutil" "github.com/ollama/ollama/ml" @@ -1331,7 +1332,9 @@ type CompletionRequest struct { Images []ImageData Options *api.Options - Grammar string // set before sending the request to the subprocess + Grammar string // set before sending the request to the subprocess + FunctionNameMap *harmony.FunctionNameMap + PrefillContent *bool } // DoneReason represents the reason why a completion response is done @@ -1484,7 +1487,7 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu return fmt.Errorf("error unmarshalling llm prediction response: %v", err) } switch { - case strings.TrimSpace(c.Content) == lastToken: + case lastToken != "" && (strings.TrimSpace(c.Content) == lastToken || strings.TrimSpace(c.Thinking) == lastToken): tokenRepeat++ default: lastToken = strings.TrimSpace(c.Content) @@ -1497,14 +1500,14 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu return ctx.Err() } - if c.Content != "" || c.Thinking != "" || len(c.ToolCalls) > 0 { - fn(c) - } - if c.Done { fn(c) return nil } + + if c.Content != "" || c.Thinking != "" || len(c.ToolCalls) > 0 { + fn(c) + } } }