api: structured outputs - chat endpoint (#7900)

Adds structured outputs to chat endpoint --------- Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Hieu Nguyen <hieunguyen1053@outlook.com>
2025-11-11 14:27:36 +01:00 · 2024-12-04 16:31:19 -08:00
parent eb8366d658
commit 630e7dc6ff
10 changed files with 180 additions and 25 deletions
--- a/llama/sampling_ext.cpp
+++ b/llama/sampling_ext.cpp
@@ -1,11 +1,13 @@
 // TODO: this is a temporary wrapper to allow calling C++ code from CGo
 #include "sampling.h"
 #include "sampling_ext.h"
+#include "json-schema-to-grammar.h"

 struct gpt_sampler *gpt_sampler_cinit(
    const struct llama_model *model, struct gpt_sampler_cparams *params)
 {
-    try {
+    try
+    {
        gpt_sampler_params sparams;
        sparams.top_k = params->top_k;
        sparams.top_p = params->top_p;
@@ -24,7 +26,9 @@ struct gpt_sampler *gpt_sampler_cinit(
        sparams.seed = params->seed;
        sparams.grammar = params->grammar;
        return gpt_sampler_init(model, sparams);
-    } catch (const std::exception & err) {
+    }
+    catch (const std::exception &err)
+    {
        return nullptr;
    }
 }
@@ -54,3 +58,24 @@ void gpt_sampler_caccept(
 {
    gpt_sampler_accept(sampler, id, apply_grammar);
 }
+
+int schema_to_grammar(const char *json_schema, char *grammar, size_t max_len)
+{
+    try
+    {
+        nlohmann::json schema = nlohmann::json::parse(json_schema);
+        std::string grammar_str = json_schema_to_grammar(schema);
+        size_t len = grammar_str.length();
+        if (len >= max_len)
+        {
+            len = max_len - 1;
+        }
+        strncpy(grammar, grammar_str.c_str(), len);
+        return len;
+    }
+    catch (const std::exception &e)
+    {
+        strncpy(grammar, "", max_len - 1);
+        return 0;
+    }
+}