routes: structured outputs for gpt-oss (#12460)

This commit is contained in:
Parth Sareen
2025-10-08 19:13:38 -07:00
committed by GitHub
parent 1b91d4dda1
commit 77060d462c
2 changed files with 377 additions and 64 deletions

View File

@@ -1191,4 +1191,238 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) {
t.Errorf("expected content %q, got %q", "Based on my analysis, the solution is straightforward.", got)
}
})
t.Run("structured outputs restart non-stream", func(t *testing.T) {
var (
requestsMu sync.Mutex
requests []llm.CompletionRequest
wg sync.WaitGroup
)
wg.Add(2)
format := json.RawMessage(`{"type":"object","properties":{"answer":{"type":"string"}}}`)
mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
defer wg.Done()
requestsMu.Lock()
requests = append(requests, r)
callNum := len(requests)
requestsMu.Unlock()
switch callNum {
case 1:
fn(llm.CompletionResponse{
Content: " I am thinking through this problem. </think> {\"answer\":\"42\"}",
Done: false,
PromptEvalCount: 1,
PromptEvalDuration: 1,
})
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(time.Second):
t.Fatalf("timeout waiting for structured outputs cancellation")
return nil
}
case 2:
fn(llm.CompletionResponse{
Content: `{"answer":"42"}`,
Done: true,
DoneReason: llm.DoneReasonStop,
PromptEvalCount: 1,
PromptEvalDuration: 1,
EvalCount: 1,
EvalDuration: 1,
})
return nil
default:
t.Fatalf("unexpected number of completion calls: %d", callNum)
return nil
}
}
think := true
streamRequest := false
w := createRequest(t, s.ChatHandler, api.ChatRequest{
Model: "test-thinking",
Messages: []api.Message{{Role: "user", Content: "Please respond in JSON."}},
Think: &api.ThinkValue{Value: think},
Stream: &streamRequest,
Format: format,
})
wg.Wait()
mock.CompletionFn = nil
if w.Code != http.StatusOK {
t.Fatalf("expected status 200, got %d", w.Code)
}
if len(requests) != 2 {
t.Fatalf("expected two completion calls, got %d", len(requests))
}
if requests[0].Format != nil {
t.Errorf("expected first completion format to be nil, got %q", requests[0].Format)
}
if !bytes.Equal([]byte(format), []byte(requests[1].Format)) {
t.Errorf("expected second completion format to match original format")
}
var resp api.ChatResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatal(err)
}
if resp.Message.Thinking != "I am thinking through this problem. " {
t.Errorf("expected thinking %q, got %q", "I am thinking through this problem. ", resp.Message.Thinking)
}
if resp.Message.Content != `{"answer":"42"}` {
t.Errorf("expected content %q, got %q", `{"answer":"42"}`, resp.Message.Content)
}
if !resp.Done {
t.Errorf("expected response to be done")
}
if resp.DoneReason != "stop" {
t.Errorf("expected done reason stop, got %s", resp.DoneReason)
}
})
t.Run("structured outputs restart streaming", func(t *testing.T) {
var (
requestsMu sync.Mutex
requests []llm.CompletionRequest
wg sync.WaitGroup
)
wg.Add(2)
format := json.RawMessage(`{"type":"object","properties":{"answer":{"type":"string"}}}`)
mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
defer wg.Done()
requestsMu.Lock()
requests = append(requests, r)
callNum := len(requests)
requestsMu.Unlock()
switch callNum {
case 1:
fn(llm.CompletionResponse{
Content: " I am thinking through this problem. </think> {\"answer\":\"42\"}",
Done: false,
PromptEvalCount: 1,
PromptEvalDuration: 1,
})
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(time.Second):
t.Fatalf("timeout waiting for structured outputs cancellation")
return nil
}
case 2:
fn(llm.CompletionResponse{
Content: `{"answer":"42"}`,
Done: true,
DoneReason: llm.DoneReasonStop,
PromptEvalCount: 1,
PromptEvalDuration: 1,
EvalCount: 1,
EvalDuration: 1,
})
return nil
default:
t.Fatalf("unexpected number of completion calls: %d", callNum)
return nil
}
}
think := true
streamRequest := true
w := createRequest(t, s.ChatHandler, api.ChatRequest{
Model: "test-thinking",
Messages: []api.Message{{Role: "user", Content: "Please respond in JSON."}},
Think: &api.ThinkValue{Value: think},
Stream: &streamRequest,
Format: format,
})
wg.Wait()
mock.CompletionFn = nil
if w.Code != http.StatusOK {
t.Fatalf("expected status 200, got %d", w.Code)
}
if len(requests) != 2 {
t.Fatalf("expected two completion calls, got %d", len(requests))
}
if requests[0].Format != nil {
t.Errorf("expected first completion format to be nil, got %q", requests[0].Format)
}
if !bytes.Equal([]byte(format), []byte(requests[1].Format)) {
t.Errorf("expected second completion format to match original format")
}
decoder := json.NewDecoder(w.Body)
var events []api.ChatResponse
for {
var event api.ChatResponse
if err := decoder.Decode(&event); err == io.EOF {
break
} else if err != nil {
t.Fatal(err)
}
events = append(events, event)
if event.Done {
break
}
}
if len(events) < 2 {
t.Fatalf("expected at least two streaming events, got %d", len(events))
}
first := events[0]
if first.Message.Thinking != "I am thinking through this problem. " {
t.Errorf("expected first event thinking %q, got %q", "I am thinking through this problem. ", first.Message.Thinking)
}
if first.Message.Content != "" {
t.Errorf("expected first event content to be empty, got %q", first.Message.Content)
}
if first.Done {
t.Error("expected first event to be non-terminal")
}
last := events[len(events)-1]
if last.Message.Thinking != "" {
t.Errorf("expected final event thinking to be empty, got %q", last.Message.Thinking)
}
if last.Message.Content != `{"answer":"42"}` {
t.Errorf("expected final event content %q, got %q", `{"answer":"42"}`, last.Message.Content)
}
if !last.Done {
t.Error("expected final event to be done")
}
if last.DoneReason != "stop" {
t.Errorf("expected final done reason stop, got %s", last.DoneReason)
}
})
}