diff --git a/llm/server.go b/llm/server.go
index 8fdf028d3c..2c4c367e4f 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -31,6 +31,7 @@ import (
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/fs/ggml"
+	"github.com/ollama/ollama/harmony"
 	"github.com/ollama/ollama/llama"
 	"github.com/ollama/ollama/logutil"
 	"github.com/ollama/ollama/ml"
@@ -1331,7 +1332,9 @@ type CompletionRequest struct {
 	Images  []ImageData
 	Options *api.Options
 
-	Grammar string // set before sending the request to the subprocess
+	Grammar         string // set before sending the request to the subprocess
+	FunctionNameMap *harmony.FunctionNameMap
+	PrefillContent  *bool
 }
 
 // DoneReason represents the reason why a completion response is done
@@ -1484,7 +1487,7 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
 				return fmt.Errorf("error unmarshalling llm prediction response: %v", err)
 			}
 			switch {
-			case strings.TrimSpace(c.Content) == lastToken:
+			case lastToken != "" && (strings.TrimSpace(c.Content) == lastToken || strings.TrimSpace(c.Thinking) == lastToken):
 				tokenRepeat++
 			default:
 				lastToken = strings.TrimSpace(c.Content)
@@ -1497,14 +1500,14 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
 				return ctx.Err()
 			}
 
-			if c.Content != "" || c.Thinking != "" || len(c.ToolCalls) > 0 {
-				fn(c)
-			}
-
 			if c.Done {
 				fn(c)
 				return nil
 			}
+
+			if c.Content != "" || c.Thinking != "" || len(c.ToolCalls) > 0 {
+				fn(c)
+			}
 		}
 	}