mirror of
https://github.com/ollama/ollama.git
synced 2025-11-10 22:28:43 +01:00
routes: structured outputs for gpt-oss (#12460)
This commit is contained in:
@@ -1191,4 +1191,238 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) {
|
||||
t.Errorf("expected content %q, got %q", "Based on my analysis, the solution is straightforward.", got)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("structured outputs restart non-stream", func(t *testing.T) {
|
||||
var (
|
||||
requestsMu sync.Mutex
|
||||
requests []llm.CompletionRequest
|
||||
wg sync.WaitGroup
|
||||
)
|
||||
|
||||
wg.Add(2)
|
||||
|
||||
format := json.RawMessage(`{"type":"object","properties":{"answer":{"type":"string"}}}`)
|
||||
|
||||
mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
|
||||
defer wg.Done()
|
||||
|
||||
requestsMu.Lock()
|
||||
requests = append(requests, r)
|
||||
callNum := len(requests)
|
||||
requestsMu.Unlock()
|
||||
|
||||
switch callNum {
|
||||
case 1:
|
||||
fn(llm.CompletionResponse{
|
||||
Content: " I am thinking through this problem. </think> {\"answer\":\"42\"}",
|
||||
Done: false,
|
||||
PromptEvalCount: 1,
|
||||
PromptEvalDuration: 1,
|
||||
})
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(time.Second):
|
||||
t.Fatalf("timeout waiting for structured outputs cancellation")
|
||||
return nil
|
||||
}
|
||||
case 2:
|
||||
fn(llm.CompletionResponse{
|
||||
Content: `{"answer":"42"}`,
|
||||
Done: true,
|
||||
DoneReason: llm.DoneReasonStop,
|
||||
PromptEvalCount: 1,
|
||||
PromptEvalDuration: 1,
|
||||
EvalCount: 1,
|
||||
EvalDuration: 1,
|
||||
})
|
||||
return nil
|
||||
default:
|
||||
t.Fatalf("unexpected number of completion calls: %d", callNum)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
think := true
|
||||
streamRequest := false
|
||||
w := createRequest(t, s.ChatHandler, api.ChatRequest{
|
||||
Model: "test-thinking",
|
||||
Messages: []api.Message{{Role: "user", Content: "Please respond in JSON."}},
|
||||
Think: &api.ThinkValue{Value: think},
|
||||
Stream: &streamRequest,
|
||||
Format: format,
|
||||
})
|
||||
|
||||
wg.Wait()
|
||||
mock.CompletionFn = nil
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status 200, got %d", w.Code)
|
||||
}
|
||||
|
||||
if len(requests) != 2 {
|
||||
t.Fatalf("expected two completion calls, got %d", len(requests))
|
||||
}
|
||||
|
||||
if requests[0].Format != nil {
|
||||
t.Errorf("expected first completion format to be nil, got %q", requests[0].Format)
|
||||
}
|
||||
|
||||
if !bytes.Equal([]byte(format), []byte(requests[1].Format)) {
|
||||
t.Errorf("expected second completion format to match original format")
|
||||
}
|
||||
|
||||
var resp api.ChatResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if resp.Message.Thinking != "I am thinking through this problem. " {
|
||||
t.Errorf("expected thinking %q, got %q", "I am thinking through this problem. ", resp.Message.Thinking)
|
||||
}
|
||||
|
||||
if resp.Message.Content != `{"answer":"42"}` {
|
||||
t.Errorf("expected content %q, got %q", `{"answer":"42"}`, resp.Message.Content)
|
||||
}
|
||||
|
||||
if !resp.Done {
|
||||
t.Errorf("expected response to be done")
|
||||
}
|
||||
|
||||
if resp.DoneReason != "stop" {
|
||||
t.Errorf("expected done reason stop, got %s", resp.DoneReason)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("structured outputs restart streaming", func(t *testing.T) {
|
||||
var (
|
||||
requestsMu sync.Mutex
|
||||
requests []llm.CompletionRequest
|
||||
wg sync.WaitGroup
|
||||
)
|
||||
|
||||
wg.Add(2)
|
||||
|
||||
format := json.RawMessage(`{"type":"object","properties":{"answer":{"type":"string"}}}`)
|
||||
|
||||
mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
|
||||
defer wg.Done()
|
||||
|
||||
requestsMu.Lock()
|
||||
requests = append(requests, r)
|
||||
callNum := len(requests)
|
||||
requestsMu.Unlock()
|
||||
|
||||
switch callNum {
|
||||
case 1:
|
||||
fn(llm.CompletionResponse{
|
||||
Content: " I am thinking through this problem. </think> {\"answer\":\"42\"}",
|
||||
Done: false,
|
||||
PromptEvalCount: 1,
|
||||
PromptEvalDuration: 1,
|
||||
})
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(time.Second):
|
||||
t.Fatalf("timeout waiting for structured outputs cancellation")
|
||||
return nil
|
||||
}
|
||||
case 2:
|
||||
fn(llm.CompletionResponse{
|
||||
Content: `{"answer":"42"}`,
|
||||
Done: true,
|
||||
DoneReason: llm.DoneReasonStop,
|
||||
PromptEvalCount: 1,
|
||||
PromptEvalDuration: 1,
|
||||
EvalCount: 1,
|
||||
EvalDuration: 1,
|
||||
})
|
||||
return nil
|
||||
default:
|
||||
t.Fatalf("unexpected number of completion calls: %d", callNum)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
think := true
|
||||
streamRequest := true
|
||||
w := createRequest(t, s.ChatHandler, api.ChatRequest{
|
||||
Model: "test-thinking",
|
||||
Messages: []api.Message{{Role: "user", Content: "Please respond in JSON."}},
|
||||
Think: &api.ThinkValue{Value: think},
|
||||
Stream: &streamRequest,
|
||||
Format: format,
|
||||
})
|
||||
|
||||
wg.Wait()
|
||||
mock.CompletionFn = nil
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status 200, got %d", w.Code)
|
||||
}
|
||||
|
||||
if len(requests) != 2 {
|
||||
t.Fatalf("expected two completion calls, got %d", len(requests))
|
||||
}
|
||||
|
||||
if requests[0].Format != nil {
|
||||
t.Errorf("expected first completion format to be nil, got %q", requests[0].Format)
|
||||
}
|
||||
|
||||
if !bytes.Equal([]byte(format), []byte(requests[1].Format)) {
|
||||
t.Errorf("expected second completion format to match original format")
|
||||
}
|
||||
|
||||
decoder := json.NewDecoder(w.Body)
|
||||
var events []api.ChatResponse
|
||||
for {
|
||||
var event api.ChatResponse
|
||||
if err := decoder.Decode(&event); err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
events = append(events, event)
|
||||
if event.Done {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(events) < 2 {
|
||||
t.Fatalf("expected at least two streaming events, got %d", len(events))
|
||||
}
|
||||
|
||||
first := events[0]
|
||||
if first.Message.Thinking != "I am thinking through this problem. " {
|
||||
t.Errorf("expected first event thinking %q, got %q", "I am thinking through this problem. ", first.Message.Thinking)
|
||||
}
|
||||
|
||||
if first.Message.Content != "" {
|
||||
t.Errorf("expected first event content to be empty, got %q", first.Message.Content)
|
||||
}
|
||||
|
||||
if first.Done {
|
||||
t.Error("expected first event to be non-terminal")
|
||||
}
|
||||
|
||||
last := events[len(events)-1]
|
||||
if last.Message.Thinking != "" {
|
||||
t.Errorf("expected final event thinking to be empty, got %q", last.Message.Thinking)
|
||||
}
|
||||
|
||||
if last.Message.Content != `{"answer":"42"}` {
|
||||
t.Errorf("expected final event content %q, got %q", `{"answer":"42"}`, last.Message.Content)
|
||||
}
|
||||
|
||||
if !last.Done {
|
||||
t.Error("expected final event to be done")
|
||||
}
|
||||
|
||||
if last.DoneReason != "stop" {
|
||||
t.Errorf("expected final done reason stop, got %s", last.DoneReason)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user