mirror of
https://github.com/ollama/ollama.git
synced 2025-11-10 15:27:20 +01:00
Qwen3VL Cloud Parser and Renderer (#12526)
* working (other than tool call is the incorrect order) for tool calls and tools * Tests work, other than image tags (tests do not go through server) and tools (not in the correct order, but contents are the same) * testing for qwen3vl parser - toolparser is working * made changes to JSON tool parser, wraps the TollCallFunction with a TollCall object * Working parser for thinking models - assumes state of thinking, emits unambiguous content in thinking, does not call tool call in thinking * changed the parser to start with collecting content * thinking prefill * add hasThinkingSupport parameter to parser * qwen3-vl -> qwen3-vl-instruct for renderer/parser * Add hasThinkingSupport=false to QwenVLParser --------- Co-authored-by: Devon Rifkin <drifkin@drifkin.net>
This commit is contained in:
@@ -266,9 +266,9 @@ func (pt PropertyType) String() string {
|
||||
|
||||
type ToolProperty struct {
|
||||
AnyOf []ToolProperty `json:"anyOf,omitempty"`
|
||||
Type PropertyType `json:"type"`
|
||||
Type PropertyType `json:"type,omitempty"`
|
||||
Items any `json:"items,omitempty"`
|
||||
Description string `json:"description"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Enum []any `json:"enum,omitempty"`
|
||||
}
|
||||
|
||||
@@ -332,7 +332,7 @@ func (t *ToolFunctionParameters) String() string {
|
||||
|
||||
type ToolFunction struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Parameters ToolFunctionParameters `json:"parameters"`
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,9 @@ func ParserForName(name string) Parser {
|
||||
case "qwen3-coder":
|
||||
parser := &Qwen3CoderParser{}
|
||||
return parser
|
||||
case "qwen3-vl-instruct":
|
||||
parser := &Qwen3VLParser{hasThinkingSupport: false}
|
||||
return parser
|
||||
case "passthrough":
|
||||
return &PassthroughParser{}
|
||||
case "harmony":
|
||||
|
||||
@@ -150,7 +150,9 @@ func eat(p *Qwen3CoderParser) ([]qwenEvent, bool) {
|
||||
ambiguous := p.acc.String()[ambiguousStart:]
|
||||
p.acc.Reset()
|
||||
p.acc.WriteString(ambiguous)
|
||||
events = append(events, qwenEventContent{content: unambiguous})
|
||||
if len(unambiguous) > 0 {
|
||||
events = append(events, qwenEventContent{content: unambiguous})
|
||||
}
|
||||
return events, false
|
||||
} else {
|
||||
// we found content that is entirely not a tool call. We should withhold
|
||||
|
||||
@@ -103,6 +103,21 @@ func TestQwenParserStreaming(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "unambiguous empty: partial tool open at buffer start",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_ca",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: "ll>abc</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "abc"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "trailing whitespace between tool call and content",
|
||||
steps: []step{
|
||||
|
||||
228
model/parsers/qwen3vl.go
Normal file
228
model/parsers/qwen3vl.go
Normal file
@@ -0,0 +1,228 @@
|
||||
package parsers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/logutil"
|
||||
)
|
||||
|
||||
// TODO: call the init function
|
||||
const (
|
||||
CollectingThinkingContent qwenParserState = iota
|
||||
CollectingContent
|
||||
CollectingToolContent
|
||||
)
|
||||
|
||||
const (
|
||||
thinkingCloseTag = "</think>"
|
||||
)
|
||||
|
||||
// TODO(gguo): add a field for isThinking
|
||||
type Qwen3VLParser struct {
|
||||
state qwenParserState
|
||||
buffer strings.Builder
|
||||
tools []api.Tool
|
||||
hasThinkingSupport bool
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) HasToolSupport() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// TODO(gguo): changes this to reference an objects param
|
||||
func (p *Qwen3VLParser) HasThinkingSupport() bool {
|
||||
return p.hasThinkingSupport
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) initialState() qwenParserState {
|
||||
if p.HasThinkingSupport() { // has thinking, start from collecting thinking content
|
||||
return CollectingThinkingContent
|
||||
}
|
||||
return CollectingContent
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
|
||||
p.tools = tools
|
||||
p.state = p.initialState()
|
||||
return tools
|
||||
}
|
||||
|
||||
type qwenEventThinkingContent struct {
|
||||
content string
|
||||
}
|
||||
|
||||
func (qwenEventThinkingContent) isQwenEvent() {}
|
||||
|
||||
func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
|
||||
p.buffer.WriteString(s)
|
||||
events := p.parseEvents()
|
||||
|
||||
var toolCalls []api.ToolCall
|
||||
var sb strings.Builder
|
||||
for _, event := range events {
|
||||
switch event := event.(type) {
|
||||
case qwenEventRawToolCall:
|
||||
toolCall, err := parseJSONToolCall(event, p.tools)
|
||||
if err != nil {
|
||||
slog.Warn("qwen tool call parsing failed", "error", err)
|
||||
return "", "", nil, err
|
||||
}
|
||||
toolCalls = append(toolCalls, toolCall)
|
||||
case qwenEventThinkingContent:
|
||||
sb.WriteString(event.content)
|
||||
case qwenEventContent:
|
||||
// TODO(drifkin): if the same turn contains multiple interleaved content
|
||||
// events, we naively append them together here.
|
||||
sb.WriteString(event.content)
|
||||
}
|
||||
}
|
||||
|
||||
return sb.String(), "", toolCalls, nil
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) parseEvents() []qwenEvent {
|
||||
var all []qwenEvent
|
||||
|
||||
keepLooping := true
|
||||
for keepLooping {
|
||||
var events []qwenEvent
|
||||
events, keepLooping = p.eat()
|
||||
if len(events) > 0 {
|
||||
all = append(all, events...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(all) > 0 {
|
||||
slog.Log(context.TODO(), logutil.LevelTrace, "qwen events parsed", "events", all, "state", p.state, "buffer", p.buffer.String())
|
||||
}
|
||||
|
||||
return all
|
||||
}
|
||||
|
||||
func emitContentBeforeTag(p *Qwen3VLParser, events []qwenEvent, tag string) []qwenEvent {
|
||||
split := strings.SplitN(p.buffer.String(), tag, 2)
|
||||
before := split[0]
|
||||
before = strings.TrimRightFunc(before, unicode.IsSpace)
|
||||
if len(before) > 0 {
|
||||
events = append(events, qwenEventContent{content: before})
|
||||
}
|
||||
after := split[1]
|
||||
p.buffer.Reset()
|
||||
p.buffer.WriteString(after)
|
||||
return events
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
|
||||
var events []qwenEvent
|
||||
|
||||
switch p.state {
|
||||
case CollectingContent:
|
||||
if strings.Contains(p.buffer.String(), toolOpenTag) {
|
||||
events = emitContentBeforeTag(p, events, toolOpenTag)
|
||||
p.state = CollectingToolContent
|
||||
return events, true
|
||||
} else if overlapLen := overlap(p.buffer.String(), toolOpenTag); overlapLen > 0 {
|
||||
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
|
||||
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
|
||||
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
|
||||
|
||||
unambiguous := p.buffer.String()[:ambiguousStart]
|
||||
ambiguous := p.buffer.String()[ambiguousStart:]
|
||||
p.buffer.Reset()
|
||||
p.buffer.WriteString(ambiguous)
|
||||
if len(unambiguous) > 0 {
|
||||
events = append(events, qwenEventContent{content: unambiguous})
|
||||
}
|
||||
return events, false
|
||||
} else {
|
||||
whitespaceLen := trailingWhitespaceLen(p.buffer.String())
|
||||
ambiguousStart := len(p.buffer.String()) - whitespaceLen
|
||||
|
||||
unambiguous := p.buffer.String()[:ambiguousStart]
|
||||
ambiguous := p.buffer.String()[ambiguousStart:]
|
||||
p.buffer.Reset()
|
||||
p.buffer.WriteString(ambiguous)
|
||||
if len(unambiguous) > 0 {
|
||||
events = append(events, qwenEventContent{content: unambiguous})
|
||||
}
|
||||
return events, false
|
||||
}
|
||||
case CollectingToolContent:
|
||||
if strings.Contains(p.buffer.String(), toolCloseTag) {
|
||||
split := strings.SplitN(p.buffer.String(), toolCloseTag, 2)
|
||||
before := split[0]
|
||||
if len(before) == 0 {
|
||||
slog.Warn("qwen tool call closing tag found but no content before it")
|
||||
}
|
||||
|
||||
after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
|
||||
events = append(events, qwenEventRawToolCall{raw: before})
|
||||
p.buffer.Reset()
|
||||
p.buffer.WriteString(after)
|
||||
p.state = CollectingContent
|
||||
return events, true
|
||||
} else {
|
||||
return events, false
|
||||
}
|
||||
case CollectingThinkingContent: // so we want to hip the unambiguous stuff
|
||||
if strings.Contains(p.buffer.String(), thinkingCloseTag) {
|
||||
split := strings.SplitN(p.buffer.String(), thinkingCloseTag, 2)
|
||||
before := split[0]
|
||||
if len(before) == 0 {
|
||||
slog.Warn("qwen tool call closing tag found but no content before it")
|
||||
}
|
||||
after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
|
||||
if len(before) > 0 {
|
||||
events = append(events, qwenEventThinkingContent{content: before})
|
||||
}
|
||||
p.buffer.Reset()
|
||||
p.buffer.WriteString(after)
|
||||
p.state = CollectingContent
|
||||
return events, true
|
||||
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 { // we see part of a close thinking tag
|
||||
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
|
||||
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
|
||||
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
|
||||
|
||||
unambiguous := p.buffer.String()[:ambiguousStart]
|
||||
ambiguous := p.buffer.String()[ambiguousStart:]
|
||||
p.buffer.Reset()
|
||||
p.buffer.WriteString(ambiguous)
|
||||
if len(unambiguous) > 0 {
|
||||
events = append(events, qwenEventThinkingContent{content: unambiguous})
|
||||
}
|
||||
return events, false
|
||||
} else {
|
||||
whitespaceLen := trailingWhitespaceLen(p.buffer.String())
|
||||
ambiguousStart := len(p.buffer.String()) - whitespaceLen
|
||||
|
||||
unambiguous := p.buffer.String()[:ambiguousStart]
|
||||
ambiguous := p.buffer.String()[ambiguousStart:]
|
||||
p.buffer.Reset()
|
||||
p.buffer.WriteString(ambiguous)
|
||||
if len(unambiguous) > 0 {
|
||||
events = append(events, qwenEventThinkingContent{content: unambiguous})
|
||||
}
|
||||
return events, false
|
||||
}
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
func parseJSONToolCall(raw qwenEventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
|
||||
var toolCallFunction api.ToolCallFunction
|
||||
if err := json.Unmarshal([]byte(raw.raw), &toolCallFunction); err != nil {
|
||||
return api.ToolCall{}, err
|
||||
}
|
||||
|
||||
toolCall := api.ToolCall{}
|
||||
toolCall.Function = toolCallFunction
|
||||
|
||||
return toolCall, nil
|
||||
}
|
||||
655
model/parsers/qwen3vl_nonthinking_test.go
Normal file
655
model/parsers/qwen3vl_nonthinking_test.go
Normal file
@@ -0,0 +1,655 @@
|
||||
package parsers
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestQwen3VLNonThinkingParserStreaming(t *testing.T) {
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
steps []step
|
||||
only bool
|
||||
}{
|
||||
{
|
||||
desc: "simple thinking",
|
||||
steps: []step{
|
||||
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "simple trip thinking",
|
||||
steps: []step{
|
||||
{input: "<think>abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "<think>abc</think>"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking with split tags",
|
||||
steps: []step{
|
||||
{input: "abc", wantEvents: []qwenEvent{qwenEventContent{content: "abc"}}},
|
||||
{input: "</think>", wantEvents: []qwenEvent{qwenEventContent{content: "</think>"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "multiple think tags",
|
||||
steps: []step{
|
||||
{input: "abc<think>actually, is not thinking</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc<think>actually, is not thinking</think>"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking and tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking</think><tool_call>I'm tool calling</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "I'm thinking</think>"},
|
||||
qwenEventRawToolCall{raw: "I'm tool calling"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "nested thinking (outside thinking, inside thinking)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking<think>I'm nested thinking</think></think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "I'm thinking<think>I'm nested thinking</think></think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "interleaved thinking",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<think>I'm thinking</think>I'm actually content</think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "<think>I'm thinking</think>I'm actually content</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "nested thinking and tool call (outside thinking, inside tool call)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking<tool_call>I'm nested tool call</tool_call></think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "I'm thinking"},
|
||||
qwenEventRawToolCall{raw: "I'm nested tool call"},
|
||||
qwenEventContent{content: "</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "nested thinking and tool call (outside tool call, inside thinking)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_call>I'm nested tool call<think>I'm thinking</think></tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "I'm nested tool call<think>I'm thinking</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "interleaved thinking and tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking<tool_call>I'm NOT a nested tool call</think></tool_call><tool_call>I'm nested tool call 2<think></tool_call></think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "I'm thinking"},
|
||||
qwenEventRawToolCall{raw: "I'm NOT a nested tool call</think>"},
|
||||
qwenEventRawToolCall{raw: "I'm nested tool call 2<think>"},
|
||||
qwenEventContent{content: "</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "emit unambiguous before partial tool open (trailing ws)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc\u00a0\n<tool_call",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "abc"}},
|
||||
},
|
||||
{
|
||||
input: " fakeout",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "\u00a0\n<tool_call fakeout"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "unambiguous empty: partial tool open at buffer start",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_ca",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: "ll>abc</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "abc"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "partial thinking tag fakeout",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc</think",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "abc</think"}},
|
||||
},
|
||||
{
|
||||
input: " fakeout",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: " fakeout"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "partial thinking incomplete",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc<think>unfinished<", // when something is ambiguious, we dont emit anything
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "abc<think>unfinished"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "test with split tool and content",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc<tool_call>unfinished</", // when something is ambiguious, we dont emit anything
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "abc"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "tool_call> def",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "unfinished"},
|
||||
qwenEventContent{content: "def"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
anyOnlies := false
|
||||
for _, tc := range cases {
|
||||
if tc.only {
|
||||
anyOnlies = true
|
||||
}
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
if anyOnlies && !tc.only {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: false}
|
||||
parser.Init([]api.Tool{}, nil)
|
||||
|
||||
for i, step := range tc.steps {
|
||||
parser.buffer.WriteString(step.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
|
||||
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
|
||||
// avoid deep equal on empty vs. nil slices
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
|
||||
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwenOldParserStreaming(t *testing.T) {
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
steps []step
|
||||
only bool
|
||||
}{
|
||||
{
|
||||
desc: "simple message streamed word by word",
|
||||
steps: []step{
|
||||
{
|
||||
input: "hi",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "hi"}},
|
||||
},
|
||||
{
|
||||
input: " there",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: " there"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "content before tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "hi there<tool_call>",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "hi there"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "multiple tool calls in one message",
|
||||
steps: []step{
|
||||
{
|
||||
input: "before1<tool_call>in tool call</tool_call>after1<tool_call>in tool call 2</tool_call>after2",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "before1"},
|
||||
qwenEventRawToolCall{raw: "in tool call"},
|
||||
qwenEventContent{content: "after1"},
|
||||
qwenEventRawToolCall{raw: "in tool call 2"},
|
||||
qwenEventContent{content: "after2"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "tool calls with split tags",
|
||||
steps: []step{
|
||||
{
|
||||
input: "before<tool",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "before"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "_call>in tool call</tool",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: "_call>af",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "in tool call"},
|
||||
qwenEventContent{content: "af"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "ter",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "ter"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "trailing whitespace between content and tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc\n<tool_call>def</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "abc"},
|
||||
qwenEventRawToolCall{raw: "def"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "trailing whitespace between tool call and content",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_call>abc</tool_call>\ndef",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "abc"},
|
||||
qwenEventContent{content: "def"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "empty content before tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "\n<tool_call>abc</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "abc"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "partial tool open tag fakeout",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc\n<tool_call",
|
||||
wantEvents: []qwenEvent{
|
||||
// \n should not be emitted yet because `<tool_call` might be a tool
|
||||
// open tag, in which case the whitespace should be trimmed
|
||||
qwenEventContent{content: "abc"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: " fakeout",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "\n<tool_call fakeout"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "token-by-token whitespace handling",
|
||||
steps: []step{
|
||||
{
|
||||
input: "a",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "a"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "\n",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: "b",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "\nb"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "unicode content",
|
||||
steps: []step{
|
||||
{
|
||||
input: "你好 🌍<tool_call>test</tool_call>مرحبا",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "你好 🌍"},
|
||||
qwenEventRawToolCall{raw: "test"},
|
||||
qwenEventContent{content: "مرحبا"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "arabic text handling",
|
||||
steps: []step{
|
||||
{
|
||||
input: "مرحبا بالعالم",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "مرحبا بالعالم"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "emoji passthrough",
|
||||
steps: []step{
|
||||
{
|
||||
input: "✅",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "✅"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "emoji after tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_call>test</tool_call>完成 ✅",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "test"},
|
||||
qwenEventContent{content: "完成 ✅"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "unicode streaming with whitespace handling",
|
||||
steps: []step{
|
||||
{
|
||||
input: "مرحبا",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "مرحبا"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: " \n",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: "世界",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: " \n世界"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "non-breaking space withheld across chunks",
|
||||
steps: []step{
|
||||
{
|
||||
input: "Hello\u00a0",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "Hello"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "world",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "\u00a0world"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "ideographic space before partial tool",
|
||||
steps: []step{
|
||||
{
|
||||
input: "Hello\u3000<tool",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "Hello"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "_call>abc",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: "</tool_call>def",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "abc"},
|
||||
qwenEventContent{content: "def"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "ideographic space before partial tool fakeout",
|
||||
steps: []step{
|
||||
{
|
||||
input: "Hello\u3000<tool",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "Hello"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "fakeout>abc",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "\u3000<toolfakeout>abc"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "unicode with partial tool tag",
|
||||
steps: []step{
|
||||
{
|
||||
input: "测试🎯 <to",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "测试🎯"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
anyOnlies := false
|
||||
for _, tc := range cases {
|
||||
if tc.only {
|
||||
anyOnlies = true
|
||||
}
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
if anyOnlies && !tc.only {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: false}
|
||||
parser.Init([]api.Tool{}, nil)
|
||||
|
||||
for i, step := range tc.steps {
|
||||
parser.buffer.WriteString(step.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
|
||||
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
|
||||
// avoid deep equal on empty vs. nil slices
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
|
||||
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLNonThinkingToolParser(t *testing.T) {
|
||||
type step struct {
|
||||
name string
|
||||
rawToolCall string
|
||||
tools []api.Tool
|
||||
wantToolCall api.ToolCall
|
||||
}
|
||||
|
||||
steps := []step{
|
||||
{
|
||||
name: "simple tool call",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "get-current-weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get-current-weather",
|
||||
Arguments: map[string]any{
|
||||
"location": "San Francisco, CA",
|
||||
"unit": "fahrenheit",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "names with spaces",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "get current temperature", "arguments": {"location with spaces": "San Francisco", "unit with spaces": "celsius"}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get current temperature",
|
||||
Arguments: map[string]any{
|
||||
"location with spaces": "San Francisco",
|
||||
"unit with spaces": "celsius",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "names with quotes",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "\"get current temperature\"", "arguments": {"\"location with spaces\"": "San Francisco", "\"unit with spaces\"": "\"celsius\""}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "\"get current temperature\"",
|
||||
Arguments: map[string]any{
|
||||
"\"location with spaces\"": "San Francisco",
|
||||
"\"unit with spaces\"": "\"celsius\"",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool call with typed parameters (json types)",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "calculate", "arguments": {"x": 3.14, "y": 42, "enabled": true, "items": ["a", "b", "c"]}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "calculate",
|
||||
Arguments: map[string]any{
|
||||
"x": 3.14,
|
||||
"y": float64(42),
|
||||
"enabled": true,
|
||||
"items": []any{"a", "b", "c"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ampersands in parameter values",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"done\""}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "exec",
|
||||
Arguments: map[string]any{
|
||||
"command": "ls && echo \"done\"",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "angle brackets in parameter values",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"a > b and a < b\""}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "exec",
|
||||
Arguments: map[string]any{
|
||||
"command": "ls && echo \"a > b and a < b\"",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "unicode in function names and parameters",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "获取天气", "arguments": {"城市": "北京", "message": "Hello! 你好! 🌟 مرحبا"}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "获取天气",
|
||||
Arguments: map[string]any{
|
||||
"城市": "北京",
|
||||
"message": "Hello! 你好! 🌟 مرحبا",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, step := range steps {
|
||||
gotToolCall, err := parseJSONToolCall(qwenEventRawToolCall{raw: step.rawToolCall}, step.tools)
|
||||
if err != nil {
|
||||
t.Errorf("step %d (%s): %v", i, step.name, err)
|
||||
}
|
||||
if !reflect.DeepEqual(gotToolCall, step.wantToolCall) {
|
||||
t.Errorf("step %d (%s): got tool call %#v, want %#v", i, step.name, gotToolCall, step.wantToolCall)
|
||||
}
|
||||
}
|
||||
}
|
||||
346
model/parsers/qwen3vl_thinking_test.go
Normal file
346
model/parsers/qwen3vl_thinking_test.go
Normal file
@@ -0,0 +1,346 @@
|
||||
package parsers
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestQwen3VLThinkingParserStreaming(t *testing.T) {
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
steps []step
|
||||
only bool
|
||||
}{
|
||||
{
|
||||
desc: "simple thinking",
|
||||
steps: []step{
|
||||
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "simple trip thinking",
|
||||
steps: []step{
|
||||
{input: "<think>abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "<think>abc"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking with split tags",
|
||||
steps: []step{
|
||||
{input: "abc", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
|
||||
{input: "</think>", wantEvents: []qwenEvent{}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "multiple think tags",
|
||||
steps: []step{
|
||||
{input: "abc<think>actually, is not thinking</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>actually, is not thinking"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking and tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking</think><tool_call>I'm tool calling</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "I'm thinking"},
|
||||
qwenEventRawToolCall{raw: "I'm tool calling"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking and content",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking</think>I'm content",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "I'm thinking"},
|
||||
qwenEventContent{content: "I'm content"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking and tool call and content",
|
||||
},
|
||||
{
|
||||
desc: "nested thinking (outside thinking, inside thinking)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking<think>I'm nested thinking</think></think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "I'm thinking<think>I'm nested thinking"},
|
||||
qwenEventContent{content: "</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "interleaved thinking",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<think>I'm thinking</think>I'm actually content</think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "<think>I'm thinking"},
|
||||
qwenEventContent{content: "I'm actually content</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "nested thinking and tool call (outside thinking, inside tool call)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking<tool_call>I'm nested tool call</tool_call></think>",
|
||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm nested tool call</tool_call>"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "nested thinking and tool call (outside tool call, inside thinking)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_call>I'm nested tool call<think>I'm thinking</think></tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "<tool_call>I'm nested tool call<think>I'm thinking"},
|
||||
qwenEventContent{content: "</tool_call>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "interleaved thinking and tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking<tool_call>I'm NOT a nested tool call</think></tool_call><tool_call>I'm nested tool call 2<think></tool_call></think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm NOT a nested tool call"},
|
||||
qwenEventContent{content: "</tool_call>"},
|
||||
qwenEventRawToolCall{raw: "I'm nested tool call 2<think>"},
|
||||
qwenEventContent{content: "</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "partial thinking tag fakeout",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc</think",
|
||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}},
|
||||
},
|
||||
{
|
||||
input: " fakeout",
|
||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "</think fakeout"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "partial thinking incomplete",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc<think>unfinished</think", // when something is ambiguious, we dont emit anything
|
||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>unfinished"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "test with split thinking and content",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc<think>unfinished</th", // when something is ambiguious, we dont emit anything
|
||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>unfinished"}},
|
||||
},
|
||||
{
|
||||
input: "ink> def",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "def"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking with no tags",
|
||||
steps: []step{
|
||||
{
|
||||
input: "Hello I am thinking",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "Hello I am thinking"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "Hello I am thinking some more",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "Hello I am thinking some more"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "Hello I am think</think> NOT",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "Hello I am think"},
|
||||
qwenEventContent{content: "NOT"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
anyOnlies := false
|
||||
for _, tc := range cases {
|
||||
if tc.only {
|
||||
anyOnlies = true
|
||||
}
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
if anyOnlies && !tc.only {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: true}
|
||||
parser.Init([]api.Tool{}, nil)
|
||||
// parser.state = CollectingThinkingContent
|
||||
|
||||
for i, step := range tc.steps {
|
||||
parser.buffer.WriteString(step.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
|
||||
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
|
||||
// avoid deep equal on empty vs. nil slices
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
|
||||
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLThinkingToolParser(t *testing.T) {
|
||||
type step struct {
|
||||
name string
|
||||
rawToolCall string
|
||||
tools []api.Tool
|
||||
wantToolCall api.ToolCall
|
||||
}
|
||||
|
||||
steps := []step{
|
||||
{
|
||||
name: "simple tool call",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "get-current-weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get-current-weather",
|
||||
Arguments: map[string]any{
|
||||
"location": "San Francisco, CA",
|
||||
"unit": "fahrenheit",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "names with spaces",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "get current temperature", "arguments": {"location with spaces": "San Francisco", "unit with spaces": "celsius"}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get current temperature",
|
||||
Arguments: map[string]any{
|
||||
"location with spaces": "San Francisco",
|
||||
"unit with spaces": "celsius",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "names with quotes",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "\"get current temperature\"", "arguments": {"\"location with spaces\"": "San Francisco", "\"unit with spaces\"": "\"celsius\""}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "\"get current temperature\"",
|
||||
Arguments: map[string]any{
|
||||
"\"location with spaces\"": "San Francisco",
|
||||
"\"unit with spaces\"": "\"celsius\"",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool call with typed parameters (json types)",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "calculate", "arguments": {"x": 3.14, "y": 42, "enabled": true, "items": ["a", "b", "c"]}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "calculate",
|
||||
Arguments: map[string]any{
|
||||
"x": 3.14,
|
||||
"y": float64(42),
|
||||
"enabled": true,
|
||||
"items": []any{"a", "b", "c"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ampersands in parameter values",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"done\""}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "exec",
|
||||
Arguments: map[string]any{
|
||||
"command": "ls && echo \"done\"",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "angle brackets in parameter values",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"a > b and a < b\""}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "exec",
|
||||
Arguments: map[string]any{
|
||||
"command": "ls && echo \"a > b and a < b\"",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "unicode in function names and parameters",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "获取天气", "arguments": {"城市": "北京", "message": "Hello! 你好! 🌟 مرحبا"}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "获取天气",
|
||||
Arguments: map[string]any{
|
||||
"城市": "北京",
|
||||
"message": "Hello! 你好! 🌟 مرحبا",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, step := range steps {
|
||||
gotToolCall, err := parseJSONToolCall(qwenEventRawToolCall{raw: step.rawToolCall}, step.tools)
|
||||
if err != nil {
|
||||
t.Errorf("step %d (%s): %v", i, step.name, err)
|
||||
}
|
||||
if !reflect.DeepEqual(gotToolCall, step.wantToolCall) {
|
||||
t.Errorf("step %d (%s): got tool call %#v, want %#v", i, step.name, gotToolCall, step.wantToolCall)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -55,7 +55,9 @@ func renderAdditionalKeys(obj any, handledKeys map[string]bool) string {
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func Qwen3CoderRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
|
||||
type Qwen3CoderRenderer struct{}
|
||||
|
||||
func (r *Qwen3CoderRenderer) Render(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
|
||||
var sb strings.Builder
|
||||
|
||||
// filter out system messages and choose the first (if any) to win
|
||||
|
||||
@@ -288,7 +288,7 @@ call tool<|im_end|>
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
rendered, err := Qwen3CoderRenderer(tt.msgs, tt.tools, nil)
|
||||
rendered, err := (&Qwen3CoderRenderer{}).Render(tt.msgs, tt.tools, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
166
model/renderers/qwen3vl.go
Normal file
166
model/renderers/qwen3vl.go
Normal file
@@ -0,0 +1,166 @@
|
||||
package renderers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func marshalWithSpaces(v any) ([]byte, error) {
|
||||
b, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
out := make([]byte, 0, len(b)+len(b)/8)
|
||||
inStr, esc := false, false
|
||||
for _, c := range b {
|
||||
if inStr {
|
||||
out = append(out, c)
|
||||
if esc {
|
||||
esc = false
|
||||
continue
|
||||
}
|
||||
if c == '\\' {
|
||||
esc = true
|
||||
continue
|
||||
}
|
||||
if c == '"' {
|
||||
inStr = false
|
||||
}
|
||||
continue
|
||||
}
|
||||
switch c {
|
||||
case '"':
|
||||
inStr = true
|
||||
out = append(out, c)
|
||||
case ':':
|
||||
out = append(out, ':', ' ')
|
||||
case ',':
|
||||
out = append(out, ',', ' ')
|
||||
default:
|
||||
out = append(out, c)
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
type Qwen3VLRenderer struct {
|
||||
isThinking bool
|
||||
}
|
||||
|
||||
func (r *Qwen3VLRenderer) renderContent(content api.Message, doVisionCount bool) string {
|
||||
// This assumes all images are at the front of the message - same assumption as ollama/ollama/runner.go
|
||||
var subSb strings.Builder
|
||||
for range content.Images {
|
||||
subSb.WriteString("<|vision_start|><|image_pad|><|vision_end|>")
|
||||
}
|
||||
// TODO: support videos
|
||||
|
||||
subSb.WriteString(content.Content)
|
||||
return subSb.String()
|
||||
}
|
||||
|
||||
func (r *Qwen3VLRenderer) Render(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
|
||||
var sb strings.Builder
|
||||
|
||||
if len(tools) > 0 {
|
||||
sb.WriteString(imStartTag + "system\n")
|
||||
if len(messages) > 0 && messages[0].Role == "system" {
|
||||
sb.WriteString(messages[0].Content + "\n\n")
|
||||
}
|
||||
sb.WriteString("# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>")
|
||||
for _, tool := range tools {
|
||||
sb.WriteString("\n")
|
||||
if b, err := marshalWithSpaces(tool); err == nil {
|
||||
sb.Write(b)
|
||||
}
|
||||
}
|
||||
sb.WriteString("\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n")
|
||||
} else if len(messages) > 0 && messages[0].Role == "system" {
|
||||
sb.WriteString("<|im_start|>system\n" + messages[0].Content + "<|im_end|>\n")
|
||||
}
|
||||
multiStepTool := true
|
||||
lastQueryIndex := len(messages) - 1 // so this is the last user message
|
||||
|
||||
for i := len(messages) - 1; i >= 0; i-- {
|
||||
message := messages[i]
|
||||
if multiStepTool && message.Role == "user" {
|
||||
// Check if content starts with <tool_response> and ends with </tool_response>
|
||||
content := r.renderContent(message, true)
|
||||
if !(strings.HasPrefix(content, "<tool_response>") && strings.HasSuffix(content, "</tool_response>")) {
|
||||
multiStepTool = false
|
||||
lastQueryIndex = i
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i, message := range messages {
|
||||
content := r.renderContent(message, true)
|
||||
|
||||
lastMessage := i == len(messages)-1
|
||||
prefill := lastMessage && message.Role == "assistant"
|
||||
|
||||
if message.Role == "user" || message.Role == "system" && i != 0 {
|
||||
sb.WriteString("<|im_start|>" + message.Role + "\n" + content + "<|im_end|>\n")
|
||||
} else if message.Role == "assistant" {
|
||||
contentReasoning := ""
|
||||
|
||||
if r.isThinking {
|
||||
if message.Thinking != "" {
|
||||
contentReasoning = message.Thinking
|
||||
}
|
||||
}
|
||||
|
||||
if r.isThinking && i > lastQueryIndex {
|
||||
if i == len(messages)-1 || contentReasoning != "" {
|
||||
sb.WriteString("<|im_start|>" + message.Role + "\n<think>\n" + strings.Trim(contentReasoning, "\n")) // do we want to add a new line here?
|
||||
if content != "" {
|
||||
sb.WriteString("\n</think>\n\n" + strings.TrimLeft(content, "\n"))
|
||||
}
|
||||
} else {
|
||||
sb.WriteString("<|im_start|>" + message.Role + "\n" + content)
|
||||
}
|
||||
} else {
|
||||
sb.WriteString("<|im_start|>" + message.Role + "\n" + content)
|
||||
}
|
||||
|
||||
if len(message.ToolCalls) > 0 {
|
||||
for j, toolCall := range message.ToolCalls {
|
||||
if j > 0 || content != "" {
|
||||
sb.WriteString("\n")
|
||||
}
|
||||
|
||||
sb.WriteString("<tool_call>\n{\"name\": \"" + toolCall.Function.Name + "\", \"arguments\": ")
|
||||
if b, err := marshalWithSpaces(toolCall.Function.Arguments); err == nil {
|
||||
sb.Write(b)
|
||||
}
|
||||
sb.WriteString("}\n</tool_call>")
|
||||
}
|
||||
}
|
||||
|
||||
if !prefill {
|
||||
sb.WriteString("<|im_end|>\n")
|
||||
}
|
||||
} else if message.Role == "tool" {
|
||||
if i == 0 || messages[i-1].Role != "tool" {
|
||||
sb.WriteString("<|im_start|>user")
|
||||
}
|
||||
sb.WriteString("\n<tool_response>\n" + message.Content + "\n</tool_response>")
|
||||
if i == len(messages)-1 || messages[i+1].Role != "tool" {
|
||||
sb.WriteString("<|im_end|>\n")
|
||||
}
|
||||
}
|
||||
|
||||
// prefill at the end
|
||||
if lastMessage && !prefill {
|
||||
sb.WriteString("<|im_start|>assistant\n")
|
||||
if r.isThinking {
|
||||
sb.WriteString("<think>\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sb.String(), nil
|
||||
}
|
||||
497
model/renderers/qwen3vl_nonthinking_test.go
Normal file
497
model/renderers/qwen3vl_nonthinking_test.go
Normal file
@@ -0,0 +1,497 @@
|
||||
package renderers
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestQwen3VLNonThinkingRenderer(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
msgs []api.Message
|
||||
images []api.ImageData
|
||||
tools []api.Tool
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "prefill",
|
||||
msgs: []api.Message{
|
||||
{Role: "system", Content: "You are a helpful assistant."},
|
||||
{Role: "user", Content: "Tell me something interesting."},
|
||||
{Role: "assistant", Content: "I'll tell you something interesting about cats"},
|
||||
},
|
||||
expected: `<|im_start|>system
|
||||
You are a helpful assistant.<|im_end|>
|
||||
<|im_start|>user
|
||||
Tell me something interesting.<|im_end|>
|
||||
<|im_start|>assistant
|
||||
I'll tell you something interesting about cats`,
|
||||
},
|
||||
{
|
||||
name: "basic",
|
||||
msgs: []api.Message{
|
||||
{Role: "system", Content: "You are a helpful assistant."},
|
||||
{Role: "user", Content: "Hello, how are you?"},
|
||||
},
|
||||
expected: `<|im_start|>system
|
||||
You are a helpful assistant.<|im_end|>
|
||||
<|im_start|>user
|
||||
Hello, how are you?<|im_end|>
|
||||
<|im_start|>assistant
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "With thinking, end assistant.",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Tell me a story in two sentences."},
|
||||
{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think>"}, // does the thinking even work?
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
Tell me a story in two sentences.<|im_end|>
|
||||
<|im_start|>assistant
|
||||
abc<think>To make this story interesting, I will speak in poetry.</think>`,
|
||||
},
|
||||
{
|
||||
name: "Multiple thinking",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Tell me a story in two sentences."},
|
||||
{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>"},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
Tell me a story in two sentences.<|im_end|>
|
||||
<|im_start|>assistant
|
||||
abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>`, // NOTE: the second thinking tag is not captured
|
||||
},
|
||||
{
|
||||
name: "Multiple thinking, multiple messages.",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Tell me a story in two sentences."},
|
||||
{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>"},
|
||||
{Role: "user", Content: "What is the weather like in San Francisco? <think>I will check the weather in San Francisco for you.</think>"},
|
||||
{Role: "assistant", Content: "I'll check the weather in San Francisco for you.<think>Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.</think>"},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
Tell me a story in two sentences.<|im_end|>
|
||||
<|im_start|>assistant
|
||||
abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think><|im_end|>
|
||||
<|im_start|>user
|
||||
What is the weather like in San Francisco? <think>I will check the weather in San Francisco for you.</think><|im_end|>
|
||||
<|im_start|>assistant
|
||||
I'll check the weather in San Francisco for you.<think>Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.</think>`,
|
||||
},
|
||||
{
|
||||
name: "Image",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Describe this image.", Images: []api.ImageData{api.ImageData("img2")}},
|
||||
{Role: "assistant", Content: "Let me analyze this image."},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
<|vision_start|><|image_pad|><|vision_end|>Describe this image.<|im_end|>
|
||||
<|im_start|>assistant
|
||||
Let me analyze this image.`,
|
||||
},
|
||||
{
|
||||
name: "Multiple images",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Describe these images.", Images: []api.ImageData{api.ImageData("img1"), api.ImageData("img2")}},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
<|vision_start|><|image_pad|><|vision_end|><|vision_start|><|image_pad|><|vision_end|>Describe these images.<|im_end|>
|
||||
<|im_start|>assistant
|
||||
`,
|
||||
},
|
||||
|
||||
// // NOTE: solved with #12518: https://github.com/ollama/ollama/compare/main...drifkin/stable-tool-args
|
||||
// {
|
||||
// name: "with tools and response",
|
||||
// msgs: []api.Message{
|
||||
// {Role: "system", Content: "You are a helpful assistant with access to tools."},
|
||||
// {Role: "user", Content: "What's the weather like in New York?"},
|
||||
// {
|
||||
// Role: "assistant",
|
||||
// Content: "I'll check the weather in New York for you.",
|
||||
// ToolCalls: []api.ToolCall{
|
||||
// {
|
||||
// Function: api.ToolCallFunction{
|
||||
// Name: "get-current-weather",
|
||||
// Arguments: map[string]any{
|
||||
// "location": "New York",
|
||||
// "unit": "fahrenheit",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {Role: "tool", Content: "80", ToolName: "get-current-weather"},
|
||||
// {Role: "user", Content: "That sounds nice! What about San Francisco?"},
|
||||
// },
|
||||
// tools: []api.Tool{
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: api.ToolFunction{
|
||||
// Name: "get-current-weather",
|
||||
// Description: "Get the current weather for a location",
|
||||
// Parameters: api.ToolFunctionParameters{
|
||||
// Type: "object",
|
||||
// Required: []string{"location"},
|
||||
// Properties: map[string]api.ToolProperty{
|
||||
// "location": {
|
||||
// Type: api.PropertyType{"string"},
|
||||
// Description: "The city and state, e.g. San Francisco, CA",
|
||||
// },
|
||||
// "unit": {
|
||||
// Type: api.PropertyType{"string"},
|
||||
// Enum: []any{"celsius", "fahrenheit"},
|
||||
// Description: "The temperature unit",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// expected: `<|im_start|>system
|
||||
// You are a helpful assistant with access to tools.
|
||||
|
||||
// # Tools
|
||||
|
||||
// You may call one or more functions to assist with the user query.
|
||||
|
||||
// You are provided with function signatures within <tools></tools> XML tags:
|
||||
// <tools>
|
||||
// {"type": "function", "function": {"name": "get-current-weather", "description": "Get the current weather for a location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit"}}, "required": ["location"]}}}
|
||||
// </tools>
|
||||
|
||||
// For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
||||
// <tool_call>
|
||||
// {"name": <function-name>, "arguments": <args-json-object>}
|
||||
// </tool_call><|im_end|>
|
||||
// <|im_start|>user
|
||||
// What's the weather like in New York?<|im_end|>
|
||||
// <|im_start|>assistant
|
||||
// I'll check the weather in New York for you.
|
||||
// <tool_call>
|
||||
// {"name": "get-current-weather", "arguments": {"location": "New York", "unit": "fahrenheit"}}
|
||||
// </tool_call><|im_end|>
|
||||
// <|im_start|>user
|
||||
// <tool_response>
|
||||
// 80
|
||||
// </tool_response><|im_end|>
|
||||
// <|im_start|>user
|
||||
// That sounds nice! What about San Francisco?<|im_end|>
|
||||
// <|im_start|>assistant
|
||||
// `,
|
||||
// },
|
||||
// // NOTE: solved with #12518: https://github.com/ollama/ollama/compare/main...drifkin/stable-tool-args
|
||||
// {
|
||||
// name: "With tools and response, multiple tool calls",
|
||||
// msgs: []api.Message{
|
||||
// {
|
||||
// Role: "system",
|
||||
// Content: "You are a helpful assistant with access to tools.",
|
||||
// },
|
||||
// {
|
||||
// Role: "user",
|
||||
// Content: "Call two tools for me: add and multiply.",
|
||||
// },
|
||||
// {
|
||||
// Role: "assistant",
|
||||
// Content: "Sure, I'll call both tools for you.",
|
||||
// ToolCalls: []api.ToolCall{
|
||||
// {
|
||||
// Function: api.ToolCallFunction{
|
||||
// Name: "add",
|
||||
// Arguments: map[string]any{
|
||||
// "a": 2,
|
||||
// "b": 3,
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Function: api.ToolCallFunction{
|
||||
// Name: "multiply",
|
||||
// Arguments: map[string]any{
|
||||
// "x": 4,
|
||||
// "y": 5,
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Role: "tool",
|
||||
// Content: "5",
|
||||
// ToolName: "add",
|
||||
// },
|
||||
// {
|
||||
// Role: "tool",
|
||||
// Content: "20",
|
||||
// ToolName: "multiply",
|
||||
// },
|
||||
// {
|
||||
// Role: "user",
|
||||
// Content: "Thanks! What are the results?",
|
||||
// },
|
||||
// },
|
||||
// tools: []api.Tool{
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: api.ToolFunction{
|
||||
// Name: "add",
|
||||
// Description: "Add two numbers",
|
||||
// Parameters: api.ToolFunctionParameters{
|
||||
// Type: "object",
|
||||
// Required: []string{"a", "b"},
|
||||
// Properties: map[string]api.ToolProperty{
|
||||
// "a": {Type: api.PropertyType{"integer"}, Description: "First number"},
|
||||
// "b": {Type: api.PropertyType{"integer"}, Description: "Second number"},
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: api.ToolFunction{
|
||||
// Name: "multiply",
|
||||
// Description: "Multiply two numbers",
|
||||
// Parameters: api.ToolFunctionParameters{
|
||||
// Type: "object",
|
||||
// Required: []string{"x", "y"},
|
||||
// Properties: map[string]api.ToolProperty{
|
||||
// "x": {Type: api.PropertyType{"integer"}, Description: "First factor"},
|
||||
// "y": {Type: api.PropertyType{"integer"}, Description: "Second factor"},
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// expected: `<|im_start|>system
|
||||
// You are a helpful assistant with access to tools.
|
||||
|
||||
// # Tools
|
||||
|
||||
// You may call one or more functions to assist with the user query.
|
||||
|
||||
// You are provided with function signatures within <tools></tools> XML tags:
|
||||
// <tools>
|
||||
// {"type": "function", "function": {"name": "add", "description": "Add two numbers", "parameters": {"type": "object", "properties": {"a": {"type": "integer"}, "b": {"type": "integer"}}, "required": ["a", "b"]}}}
|
||||
// {"type": "function", "function": {"name": "multiply", "description": "Multiply two numbers", "parameters": {"type": "object", "properties": {"x": {"description": "First factor"}, "y": {"description": "Second factor"}}, "required": ["x", "y"]}}}
|
||||
// </tools>
|
||||
|
||||
// For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
||||
// <tool_call>
|
||||
// {"name": <function-name>, "arguments": <args-json-object>}
|
||||
// </tool_call><|im_end|>
|
||||
// <|im_start|>user
|
||||
// Call two tools for me: add and multiply.<|im_end|>
|
||||
// <|im_start|>assistant
|
||||
// Sure, I'll call both tools for you.
|
||||
// <tool_call>
|
||||
// {"name": "add", "arguments": {"a": 2, "b": 3}}
|
||||
// </tool_call>
|
||||
// <tool_call>
|
||||
// {"name": "multiply", "arguments": {"x": 4, "y": 5}}
|
||||
// </tool_call><|im_end|>
|
||||
// <|im_start|>user
|
||||
// <tool_response>
|
||||
// 5
|
||||
// </tool_response>
|
||||
// <tool_response>
|
||||
// 20
|
||||
// </tool_response><|im_end|>
|
||||
// <|im_start|>user
|
||||
// Thanks! What are the results?<|im_end|>
|
||||
// <|im_start|>assistant
|
||||
// `,
|
||||
// },
|
||||
{
|
||||
name: "user tool_response block preserved",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "What's the weather?"},
|
||||
{
|
||||
Role: "assistant",
|
||||
Content: "I'll check.",
|
||||
ToolCalls: []api.ToolCall{
|
||||
{Function: api.ToolCallFunction{Name: "get-current-weather", Arguments: map[string]any{"location": "Paris", "unit": "celsius"}}},
|
||||
},
|
||||
},
|
||||
{Role: "user", Content: "<tool_response>\n18\n</tool_response>"},
|
||||
{Role: "user", Content: "Thanks!"},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
What's the weather?<|im_end|>
|
||||
<|im_start|>assistant
|
||||
I'll check.
|
||||
<tool_call>
|
||||
{"name": "get-current-weather", "arguments": {"location": "Paris", "unit": "celsius"}}
|
||||
</tool_call><|im_end|>
|
||||
<|im_start|>user
|
||||
<tool_response>
|
||||
18
|
||||
</tool_response><|im_end|>
|
||||
<|im_start|>user
|
||||
Thanks!<|im_end|>
|
||||
<|im_start|>assistant
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "assistant with multiple tool calls and content",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Hi"},
|
||||
{
|
||||
Role: "assistant",
|
||||
Content: "before",
|
||||
ToolCalls: []api.ToolCall{
|
||||
{Function: api.ToolCallFunction{Name: "add", Arguments: map[string]any{"a": 2, "b": 3}}},
|
||||
{Function: api.ToolCallFunction{Name: "mul", Arguments: map[string]any{"x": 4, "y": 5}}},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
Hi<|im_end|>
|
||||
<|im_start|>assistant
|
||||
before
|
||||
<tool_call>
|
||||
{"name": "add", "arguments": {"a": 2, "b": 3}}
|
||||
</tool_call>
|
||||
<tool_call>
|
||||
{"name": "mul", "arguments": {"x": 4, "y": 5}}
|
||||
</tool_call>`,
|
||||
},
|
||||
{
|
||||
name: "consecutive tool responses grouped",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Compute results"},
|
||||
{Role: "assistant", Content: "ok", ToolCalls: []api.ToolCall{{Function: api.ToolCallFunction{Name: "job", Arguments: map[string]any{"n": 1}}}}},
|
||||
{Role: "tool", Content: "5", ToolName: "job"},
|
||||
{Role: "tool", Content: "6", ToolName: "job"},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
Compute results<|im_end|>
|
||||
<|im_start|>assistant
|
||||
ok
|
||||
<tool_call>
|
||||
{"name": "job", "arguments": {"n": 1}}
|
||||
</tool_call><|im_end|>
|
||||
<|im_start|>user
|
||||
<tool_response>
|
||||
5
|
||||
</tool_response>
|
||||
<tool_response>
|
||||
6
|
||||
</tool_response><|im_end|>
|
||||
<|im_start|>assistant
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "last message is tool then prefill",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "run"},
|
||||
{Role: "assistant", Content: "ok", ToolCalls: []api.ToolCall{{Function: api.ToolCallFunction{Name: "exec", Arguments: map[string]any{"cmd": "ls"}}}}},
|
||||
{Role: "tool", Content: "done", ToolName: "exec"},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
run<|im_end|>
|
||||
<|im_start|>assistant
|
||||
ok
|
||||
<tool_call>
|
||||
{"name": "exec", "arguments": {"cmd": "ls"}}
|
||||
</tool_call><|im_end|>
|
||||
<|im_start|>user
|
||||
<tool_response>
|
||||
done
|
||||
</tool_response><|im_end|>
|
||||
<|im_start|>assistant
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "user with multiple images",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Describe.", Images: []api.ImageData{api.ImageData("img1"), api.ImageData("img2")}},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
<|vision_start|><|image_pad|><|vision_end|><|vision_start|><|image_pad|><|vision_end|>Describe.<|im_end|>
|
||||
<|im_start|>assistant
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "user tool_response, no whitespace",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "What's the weather?"},
|
||||
{
|
||||
Role: "assistant",
|
||||
Content: "I'll check.",
|
||||
ToolCalls: []api.ToolCall{
|
||||
{Function: api.ToolCallFunction{Name: "get-current-weather", Arguments: map[string]any{"location": "Paris", "unit": "celsius"}}},
|
||||
},
|
||||
},
|
||||
{Role: "user", Content: "<tool_response>\n18\n</tool_response>"},
|
||||
{Role: "user", Content: "Thanks!"},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
What's the weather?<|im_end|>
|
||||
<|im_start|>assistant
|
||||
I'll check.
|
||||
<tool_call>
|
||||
{"name": "get-current-weather", "arguments": {"location": "Paris", "unit": "celsius"}}
|
||||
</tool_call><|im_end|>
|
||||
<|im_start|>user
|
||||
<tool_response>
|
||||
18
|
||||
</tool_response><|im_end|>
|
||||
<|im_start|>user
|
||||
Thanks!<|im_end|>
|
||||
<|im_start|>assistant
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "user tool_response with surrounding whitespace",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "What's the weather?"},
|
||||
{
|
||||
Role: "assistant",
|
||||
Content: "I'll check.",
|
||||
ToolCalls: []api.ToolCall{
|
||||
{Function: api.ToolCallFunction{Name: "get-current-weather", Arguments: map[string]any{"location": "Paris", "unit": "celsius"}}},
|
||||
},
|
||||
},
|
||||
{Role: "user", Content: "\n\n\n\n<tool_response>\n18\n</tool_response> extra\n\n\n\n\n\n"},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
What's the weather?<|im_end|>
|
||||
<|im_start|>assistant
|
||||
I'll check.
|
||||
<tool_call>
|
||||
{"name": "get-current-weather", "arguments": {"location": "Paris", "unit": "celsius"}}
|
||||
</tool_call><|im_end|>
|
||||
<|im_start|>user
|
||||
|
||||
|
||||
|
||||
|
||||
<tool_response>
|
||||
18
|
||||
</tool_response> extra
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<|im_end|>
|
||||
<|im_start|>assistant
|
||||
`,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
rendered, err := (&Qwen3VLRenderer{false}).Render(tt.msgs, tt.tools, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if diff := cmp.Diff(rendered, tt.expected); diff != "" {
|
||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
346
model/renderers/qwen3vl_test.go
Normal file
346
model/renderers/qwen3vl_test.go
Normal file
@@ -0,0 +1,346 @@
|
||||
package renderers
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
// TODO(drifkin): this will be moved to utils in the near future and used by other renderers as well
|
||||
func TestMarshalWithSpaces(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input any
|
||||
expected string
|
||||
}{
|
||||
// basic formatting tests
|
||||
{
|
||||
name: "simple object",
|
||||
input: map[string]any{"key": "value"},
|
||||
expected: `{"key": "value"}`,
|
||||
},
|
||||
{
|
||||
name: "simple array",
|
||||
input: []any{"a", "b", "c"},
|
||||
expected: `["a", "b", "c"]`,
|
||||
},
|
||||
// escaped quotes
|
||||
{
|
||||
name: "escaped quote in string",
|
||||
input: map[string]any{"text": `quote"inside`},
|
||||
expected: `{"text": "quote\"inside"}`,
|
||||
},
|
||||
{
|
||||
name: "multiple escaped quotes",
|
||||
input: map[string]any{"text": `say "hello" and "goodbye"`},
|
||||
expected: `{"text": "say \"hello\" and \"goodbye\""}`,
|
||||
},
|
||||
// escaped backslashes
|
||||
{
|
||||
name: "escaped backslash",
|
||||
input: map[string]any{"path": `C:\windows\system32`},
|
||||
expected: `{"path": "C:\\windows\\system32"}`,
|
||||
},
|
||||
{
|
||||
name: "double backslash",
|
||||
input: map[string]any{"text": `test\\more`},
|
||||
expected: `{"text": "test\\\\more"}`,
|
||||
},
|
||||
{
|
||||
name: "backslash before quote",
|
||||
input: map[string]any{"text": `end with \"`},
|
||||
expected: `{"text": "end with \\\""}`,
|
||||
},
|
||||
// standard JSON escape sequences
|
||||
{
|
||||
name: "newline in string",
|
||||
input: map[string]any{"text": "line1\nline2"},
|
||||
expected: `{"text": "line1\nline2"}`,
|
||||
},
|
||||
{
|
||||
name: "tab in string",
|
||||
input: map[string]any{"text": "before\tafter"},
|
||||
expected: `{"text": "before\tafter"}`,
|
||||
},
|
||||
{
|
||||
name: "carriage return",
|
||||
input: map[string]any{"text": "before\rafter"},
|
||||
expected: `{"text": "before\rafter"}`,
|
||||
},
|
||||
{
|
||||
name: "multiple escape sequences",
|
||||
input: map[string]any{"text": "line1\nline2\ttab\rcarriage"},
|
||||
expected: `{"text": "line1\nline2\ttab\rcarriage"}`,
|
||||
},
|
||||
// strings containing colons and commas (no spaces should be added inside)
|
||||
{
|
||||
name: "colon in string",
|
||||
input: map[string]any{"url": "http://example.com"},
|
||||
expected: `{"url": "http://example.com"}`,
|
||||
},
|
||||
{
|
||||
name: "comma in string",
|
||||
input: map[string]any{"list": "apple, banana, cherry"},
|
||||
expected: `{"list": "apple, banana, cherry"}`,
|
||||
},
|
||||
{
|
||||
name: "colon and comma in string",
|
||||
input: map[string]any{"data": "key:value, key2:value2"},
|
||||
expected: `{"data": "key:value, key2:value2"}`,
|
||||
},
|
||||
// unicode characters
|
||||
{
|
||||
name: "emoji",
|
||||
input: map[string]any{"emoji": "😀🎉✨"},
|
||||
expected: `{"emoji": "😀🎉✨"}`,
|
||||
},
|
||||
{
|
||||
name: "chinese characters",
|
||||
input: map[string]any{"text": "你好世界"},
|
||||
expected: `{"text": "你好世界"}`,
|
||||
},
|
||||
{
|
||||
name: "arabic characters",
|
||||
input: map[string]any{"text": "مرحبا"},
|
||||
expected: `{"text": "مرحبا"}`,
|
||||
},
|
||||
{
|
||||
name: "mixed unicode and ascii",
|
||||
input: map[string]any{"text": "Hello 世界! 😀"},
|
||||
expected: `{"text": "Hello 世界! 😀"}`,
|
||||
},
|
||||
{
|
||||
name: "unicode with special symbols",
|
||||
input: map[string]any{"text": "®©™€£¥"},
|
||||
expected: `{"text": "®©™€£¥"}`,
|
||||
},
|
||||
// complex combinations - strings that look like JSON
|
||||
{
|
||||
name: "json string inside value",
|
||||
input: map[string]any{"nested": `{"key":"value"}`},
|
||||
expected: `{"nested": "{\"key\":\"value\"}"}`,
|
||||
},
|
||||
{
|
||||
name: "json array inside value",
|
||||
input: map[string]any{"array": `["a","b","c"]`},
|
||||
expected: `{"array": "[\"a\",\"b\",\"c\"]"}`,
|
||||
},
|
||||
// edge cases
|
||||
{
|
||||
name: "empty string",
|
||||
input: map[string]any{"empty": ""},
|
||||
expected: `{"empty": ""}`,
|
||||
},
|
||||
{
|
||||
name: "empty object",
|
||||
input: map[string]any{},
|
||||
expected: `{}`,
|
||||
},
|
||||
{
|
||||
name: "empty array",
|
||||
input: []any{},
|
||||
expected: `[]`,
|
||||
},
|
||||
{
|
||||
name: "numbers",
|
||||
input: map[string]any{"int": 42, "float": 3.14},
|
||||
expected: `{"float": 3.14, "int": 42}`,
|
||||
},
|
||||
{
|
||||
name: "boolean",
|
||||
input: map[string]any{"bool": true, "other": false},
|
||||
expected: `{"bool": true, "other": false}`,
|
||||
},
|
||||
{
|
||||
name: "null value",
|
||||
input: map[string]any{"value": nil},
|
||||
expected: `{"value": null}`,
|
||||
},
|
||||
// nested structures with complex strings
|
||||
{
|
||||
name: "nested object with escapes",
|
||||
input: map[string]any{
|
||||
"outer": map[string]any{
|
||||
"path": `C:\folder\file.txt`,
|
||||
"quote": `He said "hi"`,
|
||||
},
|
||||
},
|
||||
expected: `{"outer": {"path": "C:\\folder\\file.txt", "quote": "He said \"hi\""}}`,
|
||||
},
|
||||
{
|
||||
name: "array with unicode and escapes",
|
||||
input: []any{
|
||||
"normal",
|
||||
"with\nnewline",
|
||||
"with\"quote",
|
||||
"emoji😀",
|
||||
"colon:comma,",
|
||||
},
|
||||
expected: `["normal", "with\nnewline", "with\"quote", "emoji😀", "colon:comma,"]`,
|
||||
},
|
||||
{
|
||||
name: "backslash at positions before special chars",
|
||||
input: map[string]any{"text": `a\b:c\d,e`},
|
||||
expected: `{"text": "a\\b:c\\d,e"}`,
|
||||
},
|
||||
{
|
||||
name: "multiple backslashes before quote",
|
||||
input: map[string]any{"text": `ends\\"`},
|
||||
expected: `{"text": "ends\\\\\""}`,
|
||||
},
|
||||
{
|
||||
name: "unicode with escapes",
|
||||
input: map[string]any{"text": "Hello\n世界\t😀"},
|
||||
expected: `{"text": "Hello\n世界\t😀"}`,
|
||||
},
|
||||
|
||||
// Real-world tool call example
|
||||
{
|
||||
name: "tool call arguments",
|
||||
input: map[string]any{
|
||||
"location": "San Francisco, CA",
|
||||
"unit": "fahrenheit",
|
||||
"format": "json",
|
||||
},
|
||||
expected: `{"format": "json", "location": "San Francisco, CA", "unit": "fahrenheit"}`,
|
||||
},
|
||||
{
|
||||
name: "complex tool arguments with escapes",
|
||||
input: map[string]any{
|
||||
"query": `SELECT * FROM "users" WHERE name = 'O'Brien'`,
|
||||
"description": "Fetch user\ndata from DB",
|
||||
"path": `C:\data\users.db`,
|
||||
},
|
||||
expected: `{"description": "Fetch user\ndata from DB", "path": "C:\\data\\users.db", "query": "SELECT * FROM \"users\" WHERE name = 'O'Brien'"}`,
|
||||
},
|
||||
{
|
||||
name: "unicode immediately adjacent to JSON structure chars",
|
||||
input: map[string]any{"😀key": "😀value", "test": "😀:😀,😀"},
|
||||
expected: `{"test": "😀:😀,😀", "😀key": "😀value"}`,
|
||||
},
|
||||
{
|
||||
name: "long unicode string stress test",
|
||||
input: map[string]any{"text": "😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓😔😕😖😗😘😙😚😛😜😝😞😟"},
|
||||
expected: `{"text": "😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓😔😕😖😗😘😙😚😛😜😝😞😟"}`,
|
||||
},
|
||||
{
|
||||
name: "deeply nested with unicode everywhere",
|
||||
input: map[string]any{
|
||||
"😀": map[string]any{
|
||||
"你好": []any{"مرحبا", "®©™", "∑∫∂√"},
|
||||
},
|
||||
},
|
||||
expected: `{"😀": {"你好": ["مرحبا", "®©™", "∑∫∂√"]}}`,
|
||||
},
|
||||
{
|
||||
name: "unicode with all JSON special chars interleaved",
|
||||
input: map[string]any{"k😀:k": "v😀,v", "a:😀": "b,😀", "😀": ":,😀,:"},
|
||||
expected: `{"a:😀": "b,😀", "k😀:k": "v😀,v", "😀": ":,😀,:"}`,
|
||||
},
|
||||
{
|
||||
name: "combining diacritics and RTL text",
|
||||
input: map[string]any{"hebrew": "עִבְרִית", "combined": "é̀ñ", "mixed": "test:עִבְרִית,é̀ñ"},
|
||||
expected: `{"combined": "é̀ñ", "hebrew": "עִבְרִית", "mixed": "test:עִבְרִית,é̀ñ"}`,
|
||||
},
|
||||
{
|
||||
name: "pathological case: unicode + escapes + special chars",
|
||||
input: map[string]any{"😀": "test\n😀\"quote😀\\backslash😀:colon😀,comma😀"},
|
||||
expected: `{"😀": "test\n😀\"quote😀\\backslash😀:colon😀,comma😀"}`,
|
||||
},
|
||||
|
||||
// all JSON structural characters inside strings
|
||||
{
|
||||
name: "braces and brackets in strings",
|
||||
input: map[string]any{"text": "test{with}braces[and]brackets"},
|
||||
expected: `{"text": "test{with}braces[and]brackets"}`,
|
||||
},
|
||||
{
|
||||
name: "braces and brackets with colons and commas",
|
||||
input: map[string]any{"code": "{key:value,[1,2,3]}"},
|
||||
expected: `{"code": "{key:value,[1,2,3]}"}`,
|
||||
},
|
||||
{
|
||||
name: "json-like string with all structural chars",
|
||||
input: map[string]any{"schema": `{"type":"object","properties":{"name":{"type":"string"},"items":{"type":"array"}}}`},
|
||||
expected: `{"schema": "{\"type\":\"object\",\"properties\":{\"name\":{\"type\":\"string\"},\"items\":{\"type\":\"array\"}}}"}`,
|
||||
},
|
||||
|
||||
// forward slash tests (JSON allows \/ as an escape sequence)
|
||||
{
|
||||
name: "forward slash in URL",
|
||||
input: map[string]any{"url": "https://example.com/path/to/resource"},
|
||||
expected: `{"url": "https://example.com/path/to/resource"}`,
|
||||
},
|
||||
{
|
||||
name: "regex pattern with slashes",
|
||||
input: map[string]any{"regex": "/[a-z]+/gi"},
|
||||
expected: `{"regex": "/[a-z]+/gi"}`,
|
||||
},
|
||||
|
||||
// all JSON escape sequences
|
||||
{
|
||||
name: "backspace escape",
|
||||
input: map[string]any{"text": "before\bafter"},
|
||||
expected: `{"text": "before\bafter"}`,
|
||||
},
|
||||
{
|
||||
name: "form feed escape",
|
||||
input: map[string]any{"text": "before\fafter"},
|
||||
expected: `{"text": "before\fafter"}`,
|
||||
},
|
||||
{
|
||||
name: "all standard escapes combined",
|
||||
input: map[string]any{"text": "\"\\\b\f\n\r\t"},
|
||||
expected: `{"text": "\"\\\b\f\n\r\t"}`,
|
||||
},
|
||||
|
||||
// unicode escape sequences
|
||||
{
|
||||
name: "string that forces unicode escapes",
|
||||
input: map[string]any{"control": "\u0000\u0001\u001f"},
|
||||
expected: `{"control": "\u0000\u0001\u001f"}`,
|
||||
},
|
||||
|
||||
// empty objects and arrays nested with strings
|
||||
{
|
||||
name: "nested empty structures with string values",
|
||||
input: map[string]any{"empty_obj": map[string]any{}, "empty_arr": []any{}, "text": "{}[]"},
|
||||
expected: `{"empty_arr": [], "empty_obj": {}, "text": "{}[]"}`,
|
||||
},
|
||||
|
||||
// complex nesting with all structural characters
|
||||
{
|
||||
name: "deeply nested with all char types",
|
||||
input: map[string]any{
|
||||
"level1": map[string]any{
|
||||
"array": []any{
|
||||
map[string]any{"nested": "value:with,special{chars}[here]"},
|
||||
[]any{"a", "b", "c"},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: `{"level1": {"array": [{"nested": "value:with,special{chars}[here]"}, ["a", "b", "c"]]}}`,
|
||||
},
|
||||
|
||||
// string containing escaped structural characters
|
||||
{
|
||||
name: "string with multiple escape sequences and structural chars",
|
||||
input: map[string]any{"data": "test\"quote\"{brace}[bracket]:colon,comma\\backslash/slash"},
|
||||
expected: `{"data": "test\"quote\"{brace}[bracket]:colon,comma\\backslash/slash"}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := marshalWithSpaces(tt.input)
|
||||
if err != nil {
|
||||
t.Fatalf("marshalWithSpaces failed: %v", err)
|
||||
}
|
||||
|
||||
resultStr := string(result)
|
||||
if diff := cmp.Diff(resultStr, tt.expected); diff != "" {
|
||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
372
model/renderers/qwen3vl_thinking_test.go
Normal file
372
model/renderers/qwen3vl_thinking_test.go
Normal file
@@ -0,0 +1,372 @@
|
||||
package renderers
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestQwen3VLThinkingRenderer(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
msgs []api.Message
|
||||
images []api.ImageData
|
||||
tools []api.Tool
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "basic",
|
||||
msgs: []api.Message{
|
||||
{Role: "system", Content: "You are a helpful assistant."},
|
||||
{Role: "user", Content: "Hello, how are you?"},
|
||||
},
|
||||
expected: `<|im_start|>system
|
||||
You are a helpful assistant.<|im_end|>
|
||||
<|im_start|>user
|
||||
Hello, how are you?<|im_end|>
|
||||
<|im_start|>assistant
|
||||
<think>
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "With thinking, end assistant.",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Tell me a story in two sentences."},
|
||||
{Role: "assistant", Content: "abc", Thinking: "To make this story interesting, I will speak in poetry."},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
Tell me a story in two sentences.<|im_end|>
|
||||
<|im_start|>assistant
|
||||
<think>
|
||||
To make this story interesting, I will speak in poetry.
|
||||
</think>
|
||||
|
||||
abc`,
|
||||
},
|
||||
{
|
||||
name: "With thinking, end assistant.",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Tell me a story in two sentences."},
|
||||
{Role: "assistant", Thinking: "To make this story interesting, I will speak in poetry."},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
Tell me a story in two sentences.<|im_end|>
|
||||
<|im_start|>assistant
|
||||
<think>
|
||||
To make this story interesting, I will speak in poetry.`,
|
||||
},
|
||||
{
|
||||
name: "Multiple thinking",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Tell me a story in two sentences."},
|
||||
{Role: "assistant", Content: "abc", Thinking: "To make this story interesting, I will speak in poetry.<think>And I will speak in poetry after the first sentence.</think>"},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
Tell me a story in two sentences.<|im_end|>
|
||||
<|im_start|>assistant
|
||||
<think>
|
||||
To make this story interesting, I will speak in poetry.<think>And I will speak in poetry after the first sentence.</think>
|
||||
</think>
|
||||
|
||||
abc`, // NOTE: the second thinking tag is not captured
|
||||
},
|
||||
{
|
||||
name: "Multiple thinking, multiple messages.",
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Tell me a story in two sentences."},
|
||||
{Role: "assistant", Thinking: "To make this story interesting, I will speak in poetry.", Content: "abc"},
|
||||
{Role: "user", Content: "What is the weather like in San Francisco?"},
|
||||
{Role: "assistant", Thinking: "Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence."},
|
||||
},
|
||||
expected: `<|im_start|>user
|
||||
Tell me a story in two sentences.<|im_end|>
|
||||
<|im_start|>assistant
|
||||
abc<|im_end|>
|
||||
<|im_start|>user
|
||||
What is the weather like in San Francisco?<|im_end|>
|
||||
<|im_start|>assistant
|
||||
<think>
|
||||
Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.`,
|
||||
},
|
||||
// NOTE: Servers automatically prepend a [img-<n>] tag
|
||||
// {
|
||||
// name: "Image",
|
||||
// msgs: []api.Message{
|
||||
// {Role: "user", Content: "Describe this image.", Images: []api.ImageData{api.ImageData(IMAGE2_BASE64)}},
|
||||
// },
|
||||
// expected: `<|im_start|>user
|
||||
// [img-0]Describe this image.<|im_end|>
|
||||
// <|im_start|>assistant
|
||||
// <think>
|
||||
// `,
|
||||
// },
|
||||
|
||||
// NOTE: Servers automatically prepend a [img-<n>] tag
|
||||
// {
|
||||
// name: "Multiple images",
|
||||
// msgs: []api.Message{
|
||||
// {Role: "user", Content: "Describe these images.", Images: []api.ImageData{api.ImageData(IMAGE1_BASE64), api.ImageData(IMAGE2_BASE64)}},
|
||||
// },
|
||||
// expected: `<|im_start|>user
|
||||
// [img-0][img-1]Describe these images.<|im_end|>
|
||||
// <|im_start|>assistant
|
||||
// <think>
|
||||
// `,
|
||||
// },
|
||||
|
||||
// NOTE: solved with #12518: https://github.com/ollama/ollama/compare/main...drifkin/stable-tool-args
|
||||
// {
|
||||
// name: "with tools and response",
|
||||
// msgs: []api.Message{
|
||||
// {Role: "system", Content: "You are a helpful assistant with access to tools."},
|
||||
// {Role: "user", Content: "What's the weather like in New York?"},
|
||||
// {
|
||||
// Role: "assistant",
|
||||
// Content: "I'll check the weather in New York for you.",
|
||||
// ToolCalls: []api.ToolCall{
|
||||
// {
|
||||
// Function: api.ToolCallFunction{
|
||||
// Name: "get-current-weather",
|
||||
// Arguments: map[string]any{
|
||||
// "location": "New York",
|
||||
// "unit": "fahrenheit",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {Role: "tool", Content: "80", ToolName: "get-current-weather"},
|
||||
// {Role: "user", Content: "That sounds nice! What about San Francisco?"},
|
||||
// },
|
||||
// tools: []api.Tool{
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: api.ToolFunction{
|
||||
// Name: "get-current-weather",
|
||||
// Description: "Get the current weather for a location",
|
||||
// Parameters: api.ToolFunctionParameters{
|
||||
// Type: "object",
|
||||
// Required: []string{"location"},
|
||||
// Properties: map[string]api.ToolProperty{
|
||||
// "location": {
|
||||
// Type: api.PropertyType{"string"},
|
||||
// Description: "The city and state, e.g. San Francisco, CA",
|
||||
// },
|
||||
// "unit": {
|
||||
// Type: api.PropertyType{"string"},
|
||||
// Enum: []any{"celsius", "fahrenheit"},
|
||||
// Description: "The temperature unit",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// expected: `<|im_start|>system
|
||||
// You are a helpful assistant with access to tools.
|
||||
|
||||
// # Tools
|
||||
|
||||
// You may call one or more functions to assist with the user query.
|
||||
|
||||
// You are provided with function signatures within <tools></tools> XML tags:
|
||||
// <tools>
|
||||
// {"type": "function", "function": {"name": "get-current-weather", "description": "Get the current weather for a location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit"}}, "required": ["location"]}}}
|
||||
// </tools>
|
||||
|
||||
// For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
||||
// <tool_call>
|
||||
// {"name": <function-name>, "arguments": <args-json-object>}
|
||||
// </tool_call><|im_end|>
|
||||
// <|im_start|>user
|
||||
// What's the weather like in New York?<|im_end|>
|
||||
// <|im_start|>assistant
|
||||
// I'll check the weather in New York for you.
|
||||
// <tool_call>
|
||||
// {"name": "get-current-weather", "arguments": {"location": "New York", "unit": "fahrenheit"}}
|
||||
// </tool_call><|im_end|>
|
||||
// <|im_start|>user
|
||||
// <tool_response>
|
||||
// 80
|
||||
// </tool_response><|im_end|>
|
||||
// <|im_start|>user
|
||||
// That sounds nice! What about San Francisco?<|im_end|>
|
||||
// <|im_start|>assistant
|
||||
// <think>
|
||||
// `,
|
||||
// },
|
||||
|
||||
// NOTE: solved with #12518: https://github.com/ollama/ollama/compare/main...drifkin/stable-tool-args
|
||||
// {
|
||||
// name: "With tools and response, multiple tool calls",
|
||||
// msgs: []api.Message{
|
||||
// {
|
||||
// Role: "system",
|
||||
// Content: "You are a helpful assistant with access to tools.",
|
||||
// },
|
||||
// {
|
||||
// Role: "user",
|
||||
// Content: "Call two tools for me: add and multiply.",
|
||||
// },
|
||||
// {
|
||||
// Role: "assistant",
|
||||
// Content: "Sure, I'll call both tools for you.",
|
||||
// ToolCalls: []api.ToolCall{
|
||||
// {
|
||||
// Function: api.ToolCallFunction{
|
||||
// Name: "add",
|
||||
// Arguments: map[string]any{
|
||||
// "a": 2,
|
||||
// "b": 3,
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Function: api.ToolCallFunction{
|
||||
// Name: "multiply",
|
||||
// Arguments: map[string]any{
|
||||
// "x": 4,
|
||||
// "y": 5,
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Role: "tool",
|
||||
// Content: "5",
|
||||
// ToolName: "add",
|
||||
// },
|
||||
// {
|
||||
// Role: "tool",
|
||||
// Content: "20",
|
||||
// ToolName: "multiply",
|
||||
// },
|
||||
// {
|
||||
// Role: "user",
|
||||
// Content: "Thanks! What are the results?",
|
||||
// },
|
||||
// },
|
||||
// tools: []api.Tool{
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: api.ToolFunction{
|
||||
// Name: "add",
|
||||
// Description: "Add two numbers",
|
||||
// Parameters: api.ToolFunctionParameters{
|
||||
// Type: "object",
|
||||
// Required: []string{"a", "b"},
|
||||
// Properties: map[string]api.ToolProperty{
|
||||
// "a": {Type: api.PropertyType{"integer"}, Description: "First number"},
|
||||
// "b": {Type: api.PropertyType{"integer"}, Description: "Second number"},
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Type: "function",
|
||||
// Function: api.ToolFunction{
|
||||
// Name: "multiply",
|
||||
// Description: "Multiply two numbers",
|
||||
// Parameters: api.ToolFunctionParameters{
|
||||
// Type: "object",
|
||||
// Required: []string{"x", "y"},
|
||||
// Properties: map[string]api.ToolProperty{
|
||||
// "x": {Type: api.PropertyType{"integer"}, Description: "First factor"},
|
||||
// "y": {Type: api.PropertyType{"integer"}, Description: "Second factor"},
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// expected: `<|im_start|>system
|
||||
// You are a helpful assistant with access to tools.
|
||||
|
||||
// # Tools
|
||||
|
||||
// You may call one or more functions to assist with the user query.
|
||||
|
||||
// You are provided with function signatures within <tools></tools> XML tags:
|
||||
// <tools>
|
||||
// {"type": "function", "function": {"name": "add", "description": "Add two numbers", "parameters": {"type": "object", "properties": {"a": {"type": "integer"}, "b": {"type": "integer"}}, "required": ["a", "b"]}}}
|
||||
// {"type": "function", "function": {"name": "multiply", "description": "Multiply two numbers", "parameters": {"type": "object", "properties": {"x": {"type": "integer"}, "y": {"type": "integer"}}, "required": ["x", "y"]}}}
|
||||
// </tools>
|
||||
|
||||
// For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
||||
// <tool_call>
|
||||
// {"name": <function-name>, "arguments": <args-json-object>}
|
||||
// </tool_call><|im_end|>
|
||||
// <|im_start|>user
|
||||
// Call two tools for me: add and multiply.<|im_end|>
|
||||
// <|im_start|>assistant
|
||||
// Sure, I'll call both tools for you.
|
||||
// <tool_call>
|
||||
// {"name": "add", "arguments": {"a": 2, "b": 3}}
|
||||
// </tool_call>
|
||||
// <tool_call>
|
||||
// {"name": "multiply", "arguments": {"x": 4, "y": 5}}
|
||||
// </tool_call><|im_end|>
|
||||
// <|im_start|>user
|
||||
// <tool_response>
|
||||
// 5
|
||||
// </tool_response>
|
||||
// <tool_response>
|
||||
// 20
|
||||
// </tool_response><|im_end|>
|
||||
// <|im_start|>user
|
||||
// Thanks! What are the results?<|im_end|>
|
||||
// <|im_start|>assistant
|
||||
// <think>
|
||||
// `,
|
||||
// },
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
rendered, err := (&Qwen3VLRenderer{true}).Render(tt.msgs, tt.tools, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if diff := cmp.Diff(rendered, tt.expected); diff != "" {
|
||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatToolCallArgumentThinkingVL(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
arg any
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "string",
|
||||
arg: "foo",
|
||||
expected: "foo",
|
||||
},
|
||||
{
|
||||
name: "map",
|
||||
arg: map[string]any{"foo": "bar"},
|
||||
expected: "{\"foo\":\"bar\"}",
|
||||
},
|
||||
{
|
||||
name: "number",
|
||||
arg: 1,
|
||||
expected: "1",
|
||||
},
|
||||
{
|
||||
name: "boolean",
|
||||
arg: true,
|
||||
expected: "true",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := formatToolCallArgument(tt.arg)
|
||||
if got != tt.expected {
|
||||
t.Errorf("formatToolCallArgument(%v) = %v, want %v", tt.arg, got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,25 +1,19 @@
|
||||
package renderers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
import "github.com/ollama/ollama/api"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
type rendererFunc func([]api.Message, []api.Tool, *api.ThinkValue) (string, error)
|
||||
|
||||
func RenderWithRenderer(name string, msgs []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) {
|
||||
renderer := rendererForName(name)
|
||||
if renderer == nil {
|
||||
return "", fmt.Errorf("unknown renderer %q", name)
|
||||
}
|
||||
return renderer(msgs, tools, think)
|
||||
type Renderer interface {
|
||||
Render(messages []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error)
|
||||
}
|
||||
|
||||
func rendererForName(name string) rendererFunc {
|
||||
func RendererForName(name string) Renderer {
|
||||
switch name {
|
||||
case "qwen3-coder":
|
||||
return Qwen3CoderRenderer
|
||||
renderer := &Qwen3CoderRenderer{}
|
||||
return renderer
|
||||
case "qwen3-vl-instruct":
|
||||
renderer := &Qwen3VLRenderer{false}
|
||||
return renderer
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -456,6 +456,11 @@ func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
||||
|
||||
types := []string{"jpeg", "jpg", "png", "webp"}
|
||||
valid := false
|
||||
// support blank mime type to match api/chat taking just unadorned base64
|
||||
if strings.HasPrefix(url, "data:;base64,") {
|
||||
url = strings.TrimPrefix(url, "data:;base64,")
|
||||
valid = true
|
||||
}
|
||||
for _, t := range types {
|
||||
prefix := "data:image/" + t + ";base64,"
|
||||
if strings.HasPrefix(url, prefix) {
|
||||
|
||||
@@ -106,7 +106,8 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
|
||||
|
||||
func renderPrompt(m *Model, msgs []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) {
|
||||
if m.Config.Renderer != "" {
|
||||
rendered, err := renderers.RenderWithRenderer(m.Config.Renderer, msgs, tools, think)
|
||||
renderer := renderers.RendererForName(m.Config.Renderer)
|
||||
rendered, err := renderer.Render(msgs, tools, think)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user