mirror of
https://github.com/ollama/ollama.git
synced 2025-05-02 23:50:24 +02:00
This package provides a way to convert JSON schemas to equivalent EBNF. It is intended to be a replacement to llama.cpp's schema_to_grammar. This is still an early version and does not yet support all JSON schema features. The to-do list includes: - minumum/maximum constraints on integer types - minLength/maxLength constraints on string types - defs and refs
228 lines
4.7 KiB
Go
228 lines
4.7 KiB
Go
package grammar
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"iter"
|
|
"strconv"
|
|
|
|
"github.com/ollama/ollama/grammar/jsonschema"
|
|
)
|
|
|
|
const jsonTerms = `
|
|
# Unicode
|
|
#
|
|
# Unicode characters can be specified directly in the grammar, for example
|
|
# hiragana ::= [ぁ-ゟ], or with escapes: 8-bit (\xXX), 16-bit (\uXXXX) or 32-bit
|
|
# (\UXXXXXXXX).
|
|
unicode ::= \x{hex}{2} | \u{hex}{4} | \U{hex}{8}
|
|
|
|
# JSON grammar from RFC 7159
|
|
null ::= "null"
|
|
object ::= "{" (kv ("," kv)*)? "}"
|
|
array ::= "[" (value ("," value)*)? "]"
|
|
kv ::= string ":" value
|
|
integer ::= "0" | [1-9] [0-9]*
|
|
number ::= "-"? integer frac? exp?
|
|
frac ::= "." [0-9]+
|
|
exp ::= ("e" | "E") ("+" | "-") [0-9]+
|
|
string ::= "\"" char* "\""
|
|
escape ::= ["/" | "b" | "f" | "n" | "r" | "t" | unicode]
|
|
char ::= [^"\\] | escape
|
|
space ::= (" " | "\t" | "\n" | "\r")*
|
|
hex ::= [0-9] | [a-f] | [A-F]
|
|
boolean ::= "true" | "false"
|
|
value ::= object | array | string | number | boolean | "null"
|
|
|
|
# User-defined
|
|
`
|
|
|
|
// FromSchema generates a grammar from a JSON schema.
|
|
func FromSchema(buf []byte, jsonSchema []byte) ([]byte, error) {
|
|
var s *jsonschema.Schema
|
|
if err := json.Unmarshal(jsonSchema, &s); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var g builder
|
|
|
|
// "root" is the only rule that is guaranteed to exist, so we start
|
|
// with its length for padding, and then adjust it as we go.
|
|
g.pad = len("root")
|
|
for id := range dependencies("root", s) {
|
|
g.pad = max(g.pad, len(id))
|
|
}
|
|
|
|
g.b.WriteString(jsonTerms)
|
|
|
|
ids := make(map[*jsonschema.Schema]string)
|
|
for id, s := range dependencies("root", s) {
|
|
ids[s] = id
|
|
g.define(id)
|
|
if err := fromSchema(&g, ids, s); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
g.define("root")
|
|
if err := fromSchema(&g, ids, s); err != nil {
|
|
return nil, err
|
|
}
|
|
g.define("") // finalize the last rule
|
|
return g.b.Bytes(), nil
|
|
}
|
|
|
|
func fromSchema(g *builder, ids map[*jsonschema.Schema]string, s *jsonschema.Schema) error {
|
|
switch typ := s.EffectiveType(); typ {
|
|
case "array":
|
|
if len(s.PrefixItems) == 0 && s.Items == nil {
|
|
g.u("array")
|
|
} else {
|
|
g.q("[")
|
|
for i, s := range s.PrefixItems {
|
|
if i > 0 {
|
|
g.q(",")
|
|
}
|
|
g.u(ids[s])
|
|
}
|
|
if s.Items != nil {
|
|
g.u("(")
|
|
if len(s.PrefixItems) > 0 {
|
|
g.q(",")
|
|
}
|
|
g.u(ids[s.Items])
|
|
g.u(")*")
|
|
}
|
|
g.q("]")
|
|
}
|
|
case "object":
|
|
if len(s.Properties) == 0 {
|
|
g.u("object")
|
|
} else {
|
|
g.q("{")
|
|
for i, p := range s.Properties {
|
|
name := ids[p]
|
|
if i > 0 {
|
|
g.q(",")
|
|
}
|
|
g.q(p.Name)
|
|
g.q(":")
|
|
g.u(name)
|
|
}
|
|
g.q("}")
|
|
}
|
|
case "number":
|
|
buildConstrainedNumber(g, s)
|
|
case "string":
|
|
if len(s.Enum) == 0 {
|
|
g.u("string")
|
|
} else {
|
|
g.u("(")
|
|
for i, e := range s.Enum {
|
|
if i > 0 {
|
|
g.q("|")
|
|
}
|
|
g.q(string(e))
|
|
}
|
|
g.u(")")
|
|
}
|
|
case "boolean", "value", "null", "integer":
|
|
g.u(typ)
|
|
default:
|
|
return fmt.Errorf("%s: unsupported type %q", s.Name, typ)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// dependencies returns a sequence of all child dependencies of the schema in
|
|
// post-order.
|
|
//
|
|
// The first value is the id/pointer to the dependency, and the second value
|
|
// is the schema.
|
|
func dependencies(id string, s *jsonschema.Schema) iter.Seq2[string, *jsonschema.Schema] {
|
|
return func(yield func(string, *jsonschema.Schema) bool) {
|
|
for i, p := range s.Properties {
|
|
id := fmt.Sprintf("%s_%d", id, i)
|
|
for did, d := range dependencies(id, p) {
|
|
if !yield(did, d) {
|
|
return
|
|
}
|
|
}
|
|
if !yield(id, p) {
|
|
return
|
|
}
|
|
}
|
|
for i, p := range s.PrefixItems {
|
|
id := fmt.Sprintf("tuple_%d", i)
|
|
for did, d := range dependencies(id, p) {
|
|
id := fmt.Sprintf("%s_%s", id, did)
|
|
if !yield(id, d) {
|
|
return
|
|
}
|
|
}
|
|
if !yield(id, p) {
|
|
return
|
|
}
|
|
}
|
|
if s.Items != nil {
|
|
id := fmt.Sprintf("%s_tuple_%d", id, len(s.PrefixItems))
|
|
for did, d := range dependencies(id, s.Items) {
|
|
if !yield(did, d) {
|
|
return
|
|
}
|
|
}
|
|
if !yield(id, s.Items) {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
type builder struct {
|
|
b bytes.Buffer
|
|
pad int
|
|
rules int
|
|
items int
|
|
}
|
|
|
|
// define terminates the current rule, if any, and then either starts a new
|
|
// rule or does nothing else if the name is empty.
|
|
func (b *builder) define(name string) {
|
|
if b.rules > 0 {
|
|
b.b.WriteString(";\n")
|
|
}
|
|
if name == "" {
|
|
return
|
|
}
|
|
fmt.Fprintf(&b.b, "% -*s", b.pad, name)
|
|
b.b.WriteString(" ::=")
|
|
b.rules++
|
|
b.items = 0
|
|
}
|
|
|
|
// quote appends a terminal to the current rule.
|
|
func (b *builder) q(s string) {
|
|
if b.items > 0 {
|
|
b.b.WriteString(" ")
|
|
}
|
|
b.b.WriteString(" ")
|
|
b.b.WriteString(strconv.Quote(s))
|
|
}
|
|
|
|
// u appends a non-terminal to the current rule.
|
|
func (b *builder) u(s string) {
|
|
if b.items > 0 {
|
|
b.b.WriteString(" ")
|
|
}
|
|
b.b.WriteString(" ")
|
|
b.b.WriteString(s)
|
|
}
|
|
|
|
func buildConstrainedNumber(b *builder, s *jsonschema.Schema) {
|
|
if s.Minimum == 0 && s.Maximum == 0 {
|
|
b.u("TODO")
|
|
} else {
|
|
b.u("number")
|
|
}
|
|
}
|