diff --git a/llm/server.go b/llm/server.go index 8fdf028d3c..2c4c367e4f 100644 --- a/llm/server.go +++ b/llm/server.go @@ -31,6 +31,7 @@ import ( "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/format" "github.com/ollama/ollama/fs/ggml" + "github.com/ollama/ollama/harmony" "github.com/ollama/ollama/llama" "github.com/ollama/ollama/logutil" "github.com/ollama/ollama/ml" @@ -1331,7 +1332,9 @@ type CompletionRequest struct { Images []ImageData Options *api.Options - Grammar string // set before sending the request to the subprocess + Grammar string // set before sending the request to the subprocess + FunctionNameMap *harmony.FunctionNameMap + PrefillContent *bool } // DoneReason represents the reason why a completion response is done @@ -1484,7 +1487,7 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu return fmt.Errorf("error unmarshalling llm prediction response: %v", err) } switch { - case strings.TrimSpace(c.Content) == lastToken: + case lastToken != "" && (strings.TrimSpace(c.Content) == lastToken || strings.TrimSpace(c.Thinking) == lastToken): tokenRepeat++ default: lastToken = strings.TrimSpace(c.Content) @@ -1497,14 +1500,14 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu return ctx.Err() } - if c.Content != "" || c.Thinking != "" || len(c.ToolCalls) > 0 { - fn(c) - } - if c.Done { fn(c) return nil } + + if c.Content != "" || c.Thinking != "" || len(c.ToolCalls) > 0 { + fn(c) + } } }