mirror of
https://github.com/ollama/ollama.git
synced 2025-11-12 07:47:35 +01:00
254 lines
7.1 KiB
Go
254 lines
7.1 KiB
Go
package parsers
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"log/slog"
|
|
"strings"
|
|
"unicode"
|
|
|
|
"github.com/ollama/ollama/api"
|
|
"github.com/ollama/ollama/logutil"
|
|
)
|
|
|
|
// TODO: call the init function
|
|
const (
|
|
CollectingThinkingContent qwenParserState = iota
|
|
CollectingContent
|
|
CollectingToolContent
|
|
ThinkingDoneEatingWhitespace
|
|
ToolCallDoneEatingWhitespace
|
|
)
|
|
|
|
const (
|
|
thinkingCloseTag = "</think>"
|
|
)
|
|
|
|
type Qwen3VLParser struct {
|
|
state qwenParserState
|
|
buffer strings.Builder
|
|
tools []api.Tool
|
|
hasThinkingSupport bool
|
|
}
|
|
|
|
func (p *Qwen3VLParser) HasToolSupport() bool {
|
|
return true
|
|
}
|
|
|
|
func (p *Qwen3VLParser) HasThinkingSupport() bool {
|
|
return p.hasThinkingSupport
|
|
}
|
|
|
|
func (p *Qwen3VLParser) setInitialState(lastMessage *api.Message) {
|
|
prefill := lastMessage != nil && lastMessage.Role == "assistant"
|
|
if !p.HasThinkingSupport() {
|
|
p.state = CollectingContent
|
|
return
|
|
}
|
|
|
|
if prefill && lastMessage.Content != "" {
|
|
p.state = CollectingContent
|
|
return
|
|
}
|
|
|
|
p.state = CollectingThinkingContent
|
|
}
|
|
|
|
func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
|
|
p.tools = tools
|
|
p.setInitialState(lastMessage)
|
|
return tools
|
|
}
|
|
|
|
type qwenEventThinkingContent struct {
|
|
content string
|
|
}
|
|
|
|
func (qwenEventThinkingContent) isQwenEvent() {}
|
|
|
|
func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
|
|
p.buffer.WriteString(s)
|
|
events := p.parseEvents()
|
|
|
|
var toolCalls []api.ToolCall
|
|
var contentSb strings.Builder
|
|
var thinkingSb strings.Builder
|
|
for _, event := range events {
|
|
switch event := event.(type) {
|
|
case qwenEventRawToolCall:
|
|
toolCall, err := parseJSONToolCall(event, p.tools)
|
|
if err != nil {
|
|
slog.Warn("qwen tool call parsing failed", "error", err)
|
|
return "", "", nil, err
|
|
}
|
|
toolCalls = append(toolCalls, toolCall)
|
|
case qwenEventThinkingContent:
|
|
thinkingSb.WriteString(event.content)
|
|
case qwenEventContent:
|
|
// TODO(drifkin): if the same turn contains multiple interleaved content
|
|
// events, we naively append them together here.
|
|
contentSb.WriteString(event.content)
|
|
}
|
|
}
|
|
|
|
return contentSb.String(), thinkingSb.String(), toolCalls, nil
|
|
}
|
|
|
|
func (p *Qwen3VLParser) parseEvents() []qwenEvent {
|
|
var all []qwenEvent
|
|
|
|
keepLooping := true
|
|
for keepLooping {
|
|
var events []qwenEvent
|
|
events, keepLooping = p.eat()
|
|
if len(events) > 0 {
|
|
all = append(all, events...)
|
|
}
|
|
}
|
|
|
|
if len(all) > 0 {
|
|
slog.Log(context.TODO(), logutil.LevelTrace, "qwen events parsed", "events", all, "state", p.state, "buffer", p.buffer.String())
|
|
}
|
|
|
|
return all
|
|
}
|
|
|
|
func splitAtTag(p *Qwen3VLParser, tag string, trimAfter bool) (string, string) {
|
|
split := strings.SplitN(p.buffer.String(), tag, 2)
|
|
before := split[0]
|
|
before = strings.TrimRightFunc(before, unicode.IsSpace)
|
|
after := split[1]
|
|
if trimAfter {
|
|
after = strings.TrimLeftFunc(after, unicode.IsSpace)
|
|
}
|
|
p.buffer.Reset()
|
|
p.buffer.WriteString(after)
|
|
return before, after // return events
|
|
}
|
|
|
|
func (p *Qwen3VLParser) eatLeadingWhitespaceAndTransitionTo(nextState qwenParserState) ([]qwenEvent, bool) {
|
|
trimmed := strings.TrimLeftFunc(p.buffer.String(), unicode.IsSpace)
|
|
p.buffer.Reset()
|
|
if trimmed == "" {
|
|
return nil, false
|
|
}
|
|
p.state = nextState
|
|
p.buffer.WriteString(trimmed)
|
|
return nil, true
|
|
}
|
|
|
|
func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
|
|
var events []qwenEvent
|
|
|
|
switch p.state {
|
|
case CollectingContent:
|
|
if strings.Contains(p.buffer.String(), toolOpenTag) {
|
|
// events = emitContentBeforeTag(p, events, toolOpenTag)
|
|
before, _ := splitAtTag(p, toolOpenTag, false)
|
|
if len(before) > 0 {
|
|
events = append(events, qwenEventContent{content: before})
|
|
}
|
|
p.state = CollectingToolContent
|
|
return events, true
|
|
} else if overlapLen := overlap(p.buffer.String(), toolOpenTag); overlapLen > 0 {
|
|
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
|
|
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
|
|
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
|
|
|
|
unambiguous := p.buffer.String()[:ambiguousStart]
|
|
ambiguous := p.buffer.String()[ambiguousStart:]
|
|
p.buffer.Reset()
|
|
p.buffer.WriteString(ambiguous)
|
|
if len(unambiguous) > 0 {
|
|
events = append(events, qwenEventContent{content: unambiguous})
|
|
}
|
|
return events, false
|
|
} else {
|
|
whitespaceLen := trailingWhitespaceLen(p.buffer.String())
|
|
ambiguousStart := len(p.buffer.String()) - whitespaceLen
|
|
|
|
unambiguous := p.buffer.String()[:ambiguousStart]
|
|
ambiguous := p.buffer.String()[ambiguousStart:]
|
|
p.buffer.Reset()
|
|
p.buffer.WriteString(ambiguous)
|
|
if len(unambiguous) > 0 {
|
|
events = append(events, qwenEventContent{content: unambiguous})
|
|
}
|
|
return events, false
|
|
}
|
|
case CollectingToolContent:
|
|
if strings.Contains(p.buffer.String(), toolCloseTag) {
|
|
split := strings.SplitN(p.buffer.String(), toolCloseTag, 2)
|
|
before := split[0] // do we also need to do it to tool calls?
|
|
if len(before) == 0 {
|
|
slog.Warn("qwen tool call closing tag found but no content before it")
|
|
}
|
|
|
|
after := split[1]
|
|
events = append(events, qwenEventRawToolCall{raw: before})
|
|
p.buffer.Reset()
|
|
p.buffer.WriteString(after)
|
|
p.state = ToolCallDoneEatingWhitespace
|
|
return events, true
|
|
} else {
|
|
return events, false
|
|
}
|
|
case CollectingThinkingContent:
|
|
if strings.Contains(p.buffer.String(), thinkingCloseTag) {
|
|
thinking, remaining := splitAtTag(p, thinkingCloseTag, true)
|
|
if len(thinking) > 0 {
|
|
events = append(events, qwenEventThinkingContent{content: thinking})
|
|
}
|
|
if remaining == "" {
|
|
p.state = ThinkingDoneEatingWhitespace
|
|
} else {
|
|
p.state = CollectingContent
|
|
}
|
|
return events, true
|
|
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 {
|
|
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
|
|
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
|
|
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
|
|
|
|
unambiguous := p.buffer.String()[:ambiguousStart]
|
|
ambiguous := p.buffer.String()[ambiguousStart:]
|
|
p.buffer.Reset()
|
|
p.buffer.WriteString(ambiguous)
|
|
if len(unambiguous) > 0 {
|
|
events = append(events, qwenEventThinkingContent{content: unambiguous})
|
|
}
|
|
return events, false
|
|
} else {
|
|
whitespaceLen := trailingWhitespaceLen(p.buffer.String())
|
|
ambiguousStart := len(p.buffer.String()) - whitespaceLen
|
|
|
|
unambiguous := p.buffer.String()[:ambiguousStart]
|
|
ambiguous := p.buffer.String()[ambiguousStart:]
|
|
p.buffer.Reset()
|
|
p.buffer.WriteString(ambiguous)
|
|
if len(unambiguous) > 0 {
|
|
events = append(events, qwenEventThinkingContent{content: unambiguous})
|
|
}
|
|
return events, false
|
|
}
|
|
case ThinkingDoneEatingWhitespace:
|
|
return p.eatLeadingWhitespaceAndTransitionTo(CollectingContent)
|
|
case ToolCallDoneEatingWhitespace:
|
|
return p.eatLeadingWhitespaceAndTransitionTo(CollectingContent)
|
|
default:
|
|
panic("unreachable")
|
|
}
|
|
}
|
|
|
|
func parseJSONToolCall(raw qwenEventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
|
|
var toolCallFunction api.ToolCallFunction
|
|
if err := json.Unmarshal([]byte(raw.raw), &toolCallFunction); err != nil {
|
|
return api.ToolCall{}, err
|
|
}
|
|
|
|
toolCall := api.ToolCall{}
|
|
toolCall.Function = toolCallFunction
|
|
|
|
return toolCall, nil
|
|
}
|