llama: test key order preservation in schema_to_grammar (#8078)

This change adds a test to catch a regression in schema_to_grammar where
the order of keys in the JSON schema is not preserved in the generated
grammar, which is critical for step-by-step reasoning.
This commit is contained in:
Parth Sareen 2024-12-18 19:44:50 -08:00 committed by GitHub
parent a72f2dce45
commit 290cf2040a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 107 additions and 76 deletions

View File

@ -0,0 +1,107 @@
package grammar
import (
"bufio"
"bytes"
"strings"
"testing"
"github.com/ollama/ollama/llama"
)
// https://github.com/ollama/ollama/issues/7978
const issue7978JSONSchema = `{
"type": "object",
"properties": {
"steps": {
"type": "array",
"items": {
"type": "object",
"properties": {
"explanation": { "type": "string" },
"output": { "type": "string" },
"nested": {
"type": "object",
"properties": {
"deep": { "type": "string" }
}
}
},
"required": ["explanation", "output"],
"additionalProperties": false
}
},
"final_answer": { "type": "string" },
"01_numbered_key": { "type": "string" },
"numbers": {
"type": "array",
"items": { "type": "number" }
},
"booleans": {
"type": "array",
"items": { "type": "boolean" }
},
"mixed": {
"type": "array",
"items": {
"oneOf": [
{ "type": "string" },
{ "type": "number" },
{ "type": "boolean" }
]
}
}
},
"required": ["steps", "final_answer"],
"additionalProperties": false
}`
func TestIssue7978(t *testing.T) {
g := llama.SchemaToGrammar([]byte(issue7978JSONSchema))
if g == nil {
t.Fatal("failed to convert JSON schema to grammar")
}
t.Logf("grammar:\n%s", g)
t.Log()
var got string
s := bufio.NewScanner(bytes.NewReader(g))
for s.Scan() {
line := strings.TrimSpace(s.Text())
step, _, _ := strings.Cut(line, " ::= ")
step = strings.TrimSpace(step)
if step == "root" {
got = line
}
}
want := `root ::= "{" space steps-kv "," space final-answer-kv ( "," space ( 01-numbered-key-kv 01-numbered-key-rest | numbers-kv numbers-rest | booleans-kv booleans-rest | mixed-kv ) )? "}" space`
if got != want {
t.Errorf("root =\n%qwant:\n%q", got, want)
}
}
func TestSchemaToGrammer(t *testing.T) {
cases := []struct {
schema string
prefix []byte // nil is check as nil
}{
{`invalid`, nil},
// Simple heuristic/smoke test
{`{"type":"object"}`, []byte("root ::= object")},
}
for _, c := range cases {
t.Run("x", func(t *testing.T) {
g := llama.SchemaToGrammar([]byte(c.schema))
if c.prefix == nil && g != nil {
t.Fatalf("grammar = %v, want nil", g)
}
if !bytes.HasPrefix(g, c.prefix) {
t.Errorf("grammar = %q, want %q", g, c.prefix)
}
})
}
}

View File

@ -1,76 +0,0 @@
package llama
import (
"bufio"
"bytes"
"strings"
"testing"
)
// https://github.com/ollama/ollama/issues/7978
const issue7978JSONSchema = `{
"type": "object",
"properties": {
"steps": {
"type": "array",
"items": {
"type": "object",
"properties": {
"explanation": { "type": "string" },
"output": { "type": "string" }
},
"required": ["explanation", "output"],
"additionalProperties": false
}
},
"final_answer": { "type": "string" }
},
"required": ["steps", "final_answer"],
"additionalProperties": false
}`
func TestIssue7978(t *testing.T) {
g := SchemaToGrammar([]byte(issue7978JSONSchema))
if g == nil {
t.Fatal("failed to convert JSON schema to grammar")
}
t.Logf("grammar:\n%s", g)
t.Log()
var sawSteps bool
s := bufio.NewScanner(bytes.NewReader(g))
for s.Scan() {
line := s.Text()
if strings.Contains(line, "steps") {
sawSteps = true
}
if strings.Contains(line, "final-answer") && !sawSteps {
t.Error("expected 'steps' before 'final-answer'")
}
}
}
func TestSchemaToGrammer(t *testing.T) {
cases := []struct {
schema string
prefix []byte // nil is check as nil
}{
{`invalid`, nil},
// Simple heuristic/smoke test
{`{"type":"object"}`, []byte("root ::= object")},
}
for _, c := range cases {
t.Run("x", func(t *testing.T) {
g := SchemaToGrammar([]byte(c.schema))
if c.prefix == nil && g != nil {
t.Fatalf("grammar = %v, want nil", g)
}
if !bytes.HasPrefix(g, c.prefix) {
t.Errorf("grammar = %q, want %q", g, c.prefix)
}
})
}
}