From 106592820d6d20862c3cbb9a79e0763a22439d9b Mon Sep 17 00:00:00 2001
From: ParthSareen <parth.sareen@ollama.com>
Date: Thu, 3 Apr 2025 11:02:13 -0700
Subject: [PATCH] llama: fix naming in grammar

---
 llama/grammar.cpp | 18 +++++++++---------
 llama/grammar.h   |  4 ++--
 llm/server.go     |  5 +----
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/llama/grammar.cpp b/llama/grammar.cpp
index a9129f3c1..095908c2f 100644
--- a/llama/grammar.cpp
+++ b/llama/grammar.cpp
@@ -871,7 +871,7 @@ grammar_candidates grammar_reject_candidates_for_stack(
 
     if (stack.empty()) {
         for (const auto & tok : candidates) {
-            if (*tok.code_points != 0 || tok.partial_utf8.n_remain != 0) {
+            if (*tok.code_points != 0 || tok.utf8_state.n_remain != 0) {
                 rejects.push_back(tok);
             }
         }
@@ -887,12 +887,12 @@ grammar_candidates grammar_reject_candidates_for_stack(
         if (*tok.code_points == 0) {
             // reached end of full codepoints in token, reject iff it ended in a partial sequence
             // that cannot satisfy this position in grammar
-            if (tok.partial_utf8.n_remain != 0 &&
-                    !grammar_match_partial_char(stack_pos, tok.partial_utf8)) {
+            if (tok.utf8_state.n_remain != 0 &&
+                    !grammar_match_partial_char(stack_pos, tok.utf8_state)) {
                 rejects.push_back(tok);
             }
         } else if (grammar_match_char(stack_pos, *tok.code_points).first) {
-            next_candidates.push_back({ tok.index, tok.code_points + 1, tok.partial_utf8 });
+            next_candidates.push_back({ tok.index, tok.code_points + 1, tok.utf8_state });
         } else {
             rejects.push_back(tok);
         }
@@ -910,7 +910,7 @@ grammar_candidates grammar_reject_candidates_for_stack(
 
     auto next_rejects = grammar_reject_candidates(rules, next_stacks, next_candidates);
     for (const auto & tok : next_rejects) {
-        rejects.push_back({ tok.index, tok.code_points - 1, tok.partial_utf8 });
+        rejects.push_back({ tok.index, tok.code_points - 1, tok.utf8_state });
     }
 
     return rejects;
@@ -1123,7 +1123,7 @@ struct grammar * grammar_clone_impl(const struct grammar & g) {
         g.vocab,
         g.rules,
         g.stacks,
-        g.partial_utf8,
+        g.utf8_state,
         g.lazy,
         g.awaiting_trigger,
         g.trigger_buffer,
@@ -1179,7 +1179,7 @@ void grammar_apply_impl(const struct grammar & grammar, llama_token_data_array *
         } else if (piece.empty() || piece[0] == 0) {
             cur_p->data[i].logit = -INFINITY;
         } else {
-            candidates_decoded.push_back(decode_utf8(piece, grammar.partial_utf8));
+            candidates_decoded.push_back(decode_utf8(piece, grammar.utf8_state));
             candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second });
         }
     }
@@ -1235,14 +1235,14 @@ void grammar_accept_impl(struct grammar & grammar, llama_token token) {
 
 void grammar_accept_str(struct grammar & grammar, const std::string & piece) {
     // Note terminating 0 in decoded string
-    const auto   decoded     = decode_utf8(piece, grammar.partial_utf8);
+    const auto   decoded     = decode_utf8(piece, grammar.utf8_state);
     const auto & code_points = decoded.first;
 
     for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
         grammar_accept(&grammar, *it);
     }
 
-    grammar.partial_utf8 = decoded.second;
+    grammar.utf8_state = decoded.second;
     if (grammar.stacks.empty()) {
         throw std::runtime_error("Unexpected empty grammar stack after accepting piece: " + piece);
     }
diff --git a/llama/grammar.h b/llama/grammar.h
index 3c3d609f2..c7cacc779 100644
--- a/llama/grammar.h
+++ b/llama/grammar.h
@@ -58,7 +58,7 @@ struct partial_utf8 {
 struct grammar_candidate {
     size_t           index;
     const uint32_t * code_points;
-    partial_utf8     partial_utf8;
+    partial_utf8     utf8_state;
 };
 
 using grammar_rule  = std::vector<      grammar_element>;
@@ -120,7 +120,7 @@ struct grammar {
           grammar_stacks stacks;
 
     // buffer for partially generated UTF-8 sequence from accepted tokens
-    partial_utf8 partial_utf8;
+    partial_utf8 utf8_state;
 
     // lazy grammars wait for trigger words or tokens before constraining the sampling.
     // we still have trigger_tokens for non-lazy grammars to force printing of special trigger tokens.
diff --git a/llm/server.go b/llm/server.go
index 1568d5f8a..80543bbd3 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -655,8 +655,7 @@ string ::=
   )* "\""
 number ::= "-"? ("0" | [1-9] [0-9]*) ("." [0-9]+)? ([eE] [-+]? [0-9]+)?
 ws     ::= [ \t \n \r]*
-s      ::= [ \n \t]
-t      ::= [ \t \r]*`
+s      ::= [ \n \t]`
 
 const maxBufferSize = 512 * format.KiloByte
 
@@ -694,8 +693,6 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
 			break
 		case `"json"`:
 			req.Grammar = grammarJSON
-			slog.Info("using JSON grammar")
-			slog.Info(req.Grammar)
 		default:
 			if req.Format[0] != '{' {
 				return fmt.Errorf("invalid format: %q; expected \"json\" or a valid JSON Schema object", req.Format)