diff --git a/api/types.go b/api/types.go index a7ddbc373e..df3504c3b2 100644 --- a/api/types.go +++ b/api/types.go @@ -313,10 +313,11 @@ func (t *ToolFunction) String() string { // ChatResponse is the response returned by [Client.Chat]. Its fields are // similar to [GenerateResponse]. type ChatResponse struct { - Model string `json:"model"` - CreatedAt time.Time `json:"created_at"` - Message Message `json:"message"` - DoneReason string `json:"done_reason,omitempty"` + Model string `json:"model"` + CreatedAt time.Time `json:"created_at"` + Message Message `json:"message"` + DoneReason string `json:"done_reason,omitempty"` + DebugInfo *DebugInfo `json:"_debug_info,omitempty"` Done bool `json:"done"` @@ -329,13 +330,6 @@ type DebugInfo struct { ImageCount int `json:"image_count,omitempty"` } -// DebugTemplateResponse is returned when _debug_render_only is set to true -type DebugTemplateResponse struct { - Model string `json:"model"` - CreatedAt time.Time `json:"created_at"` - DebugInfo DebugInfo `json:"_debug_info"` -} - type Metrics struct { TotalDuration time.Duration `json:"total_duration,omitempty"` LoadDuration time.Duration `json:"load_duration,omitempty"` @@ -443,6 +437,8 @@ type CreateRequest struct { System string `json:"system,omitempty"` Parameters map[string]any `json:"parameters,omitempty"` Messages []Message `json:"messages,omitempty"` + Renderer string `json:"renderer,omitempty"` + Parser string `json:"parser,omitempty"` // Deprecated: set the model name with Model instead Name string `json:"name"` @@ -480,6 +476,8 @@ type ShowResponse struct { Parameters string `json:"parameters,omitempty"` Template string `json:"template,omitempty"` System string `json:"system,omitempty"` + Renderer string `json:"renderer,omitempty"` + Parser string `json:"parser,omitempty"` Details ModelDetails `json:"details,omitempty"` Messages []Message `json:"messages,omitempty"` ModelInfo map[string]any `json:"model_info,omitempty"` @@ -592,6 +590,8 @@ type GenerateResponse struct { Metrics ToolCalls []ToolCall `json:"tool_calls,omitempty"` + + DebugInfo *DebugInfo `json:"_debug_info,omitempty"` } // ModelDetails provides details about a model. diff --git a/model/parsers/parsers.go b/model/parsers/parsers.go new file mode 100644 index 0000000000..e6dbd1f4f1 --- /dev/null +++ b/model/parsers/parsers.go @@ -0,0 +1,37 @@ +package parsers + +import ( + "github.com/ollama/ollama/api" +) + +type Parser interface { + Add(s string, tools []api.Tool) (content string, thinking string, calls []api.ToolCall, err error) + HasToolSupport() bool + HasThinkingSupport() bool +} + +func ParserForName(name string) Parser { + switch name { + case "qwen3-coder": + parser := &Qwen3CoderParser{} + return parser + case "passthrough": + return &PassthroughParser{} + default: + return nil + } +} + +type PassthroughParser struct{} + +func (p *PassthroughParser) Add(s string, tools []api.Tool) (content string, thinking string, calls []api.ToolCall, err error) { + return s, "", nil, nil +} + +func (p *PassthroughParser) HasToolSupport() bool { + return false +} + +func (p *PassthroughParser) HasThinkingSupport() bool { + return false +} diff --git a/model/parsers/qwen3coder.go b/model/parsers/qwen3coder.go new file mode 100644 index 0000000000..b0e8ec48cd --- /dev/null +++ b/model/parsers/qwen3coder.go @@ -0,0 +1,410 @@ +package parsers + +import ( + "context" + "encoding/json" + "encoding/xml" + "fmt" + "log/slog" + "math" + "regexp" + "strconv" + "strings" + "unicode" + + "github.com/ollama/ollama/api" + "github.com/ollama/ollama/logutil" +) + +type qwenParserState int + +const ( + toolOpenTag = "" + toolCloseTag = "" +) + +const ( + qwenParserState_LookingForToolStart qwenParserState = iota + qwenParserState_CollectingToolContent +) + +type Qwen3CoderParser struct { + state qwenParserState + acc strings.Builder +} + +func (p *Qwen3CoderParser) HasToolSupport() bool { + return true +} + +func (p *Qwen3CoderParser) HasThinkingSupport() bool { + return false +} + +func (p *Qwen3CoderParser) Add(s string, tools []api.Tool) (content string, thinking string, calls []api.ToolCall, err error) { + p.acc.WriteString(s) + + events := p.parseEvents() + + var toolCalls []api.ToolCall + var sb strings.Builder + for _, event := range events { + switch event := event.(type) { + case qwenEventRawToolCall: + toolCall, err := parseToolCall(event, tools) + if err != nil { + slog.Warn("qwen tool call parsing failed", "error", err) + return "", "", nil, err + } + toolCalls = append(toolCalls, toolCall) + case qwenEventContent: + // TODO(drifkin): if the same turn contains multiple interleaved content + // events, we naively append them together here. See the note below about + // `qwenEvent`s for more details + sb.WriteString(event.content) + } + } + + return sb.String(), "", toolCalls, nil +} + +func (p *Qwen3CoderParser) parseEvents() []qwenEvent { + var all []qwenEvent + + keepLooping := true + for keepLooping { + var events []qwenEvent + events, keepLooping = eat(p) + if len(events) > 0 { + all = append(all, events...) + } + } + + if len(all) > 0 { + slog.Log(context.TODO(), logutil.LevelTrace, "qwen events parsed", "events", all, "state", p.state, "acc", p.acc.String()) + } + + return all +} + +// we use some internal event types in order to communicate between `Add` and +// `eat`. We do this to support interleaving content and parallel tool calls in +// the parser, even though qwen3-coder isn't supposed to do this. Our API +// doesn't currently support models outputting multiple messages in a turn, so +// we wouldn't be able to represent it yet, but there's no reason to prevent the +// parser from supporting it, especially for future models if they end up using +// a similar format. +type qwenEvent interface { + isQwenEvent() +} + +type qwenEventRawToolCall struct { + raw string +} + +type qwenEventContent struct { + content string +} + +func (qwenEventContent) isQwenEvent() {} +func (qwenEventRawToolCall) isQwenEvent() {} + +// eat consumes the parser's buffer, and returns a list of any unambiguous +// events from the current parser state. If the parser transitions to another +// state, it may have additional events to emit on the next call, which is what +// the second return value indicates +func eat(p *Qwen3CoderParser) ([]qwenEvent, bool) { + var events []qwenEvent + + switch p.state { + case qwenParserState_LookingForToolStart: + if strings.Contains(p.acc.String(), toolOpenTag) { + // we found a full tool open tag, so we can emit the content before the + // tag, being sure to trim any trailing whitespace + split := strings.SplitN(p.acc.String(), toolOpenTag, 2) + before := split[0] + before = strings.TrimRightFunc(before, unicode.IsSpace) + if len(before) > 0 { + events = append(events, qwenEventContent{content: before}) + } + after := split[1] + p.acc.Reset() + p.acc.WriteString(after) + p.state = qwenParserState_CollectingToolContent + return events, true + } else if overlap := overlap(p.acc.String(), toolOpenTag); overlap > 0 { + // we found a partial tool open tag, so we can emit the unambiguous part, + // which is the (trailing-whitespace trimmed) content before the partial + // tool open tag + beforePartialTag := p.acc.String()[:len(p.acc.String())-overlap] + trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag) + ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen + unambiguous := p.acc.String()[:ambiguousStart] + ambiguous := p.acc.String()[ambiguousStart:] + p.acc.Reset() + p.acc.WriteString(ambiguous) + events = append(events, qwenEventContent{content: unambiguous}) + return events, false + } else { + // we found content that is entirely not a tool call. We should withhold + // any trailing whitespace in case this is the end of the content + whitespaceLen := trailingWhitespaceLen(p.acc.String()) + ambiguousStart := len(p.acc.String()) - whitespaceLen + unambiguous := p.acc.String()[:ambiguousStart] + ambiguous := p.acc.String()[ambiguousStart:] + p.acc.Reset() + p.acc.WriteString(ambiguous) + if len(unambiguous) > 0 { + events = append(events, qwenEventContent{content: unambiguous}) + } + return events, false + } + case qwenParserState_CollectingToolContent: + if strings.Contains(p.acc.String(), toolCloseTag) { + split := strings.SplitN(p.acc.String(), toolCloseTag, 2) + before := split[0] + if len(before) == 0 { + slog.Warn("qwen tool call closing tag found but no content before it") + } + // remove any whitespace between the tool call and any content after it + after := strings.TrimLeftFunc(split[1], unicode.IsSpace) + p.acc.Reset() + p.acc.WriteString(after) + events = append(events, qwenEventRawToolCall{raw: before}) + p.state = qwenParserState_LookingForToolStart + return events, true + } else { + // note that we don't need to check the overlap here because we only plan + // on parsing the tool call once we see the full closing tag. We don't + // stream back the unparsed tool content, so there's no need to be eager + // here + return events, false + } + default: + panic("unreachable") + } +} + +// TODO(drifkin): move this to a shared location +// longest overlap between suffix of s and prefix of delim +func overlap(s, delim string) int { + max := min(len(delim), len(s)) + for i := max; i > 0; i-- { + if strings.HasSuffix(s, delim[:i]) { + return i + } + } + return 0 +} + +func trailingWhitespaceLen(s string) int { + for i := len(s) - 1; i >= 0; i-- { + if !unicode.IsSpace(rune(s[i])) { + return len(s) - i - 1 + } + } + return len(s) +} + +type XMLFunctionCall struct { + XMLName xml.Name `xml:"function"` + Name string `xml:"name,attr"` + Parameters []XMLParameter `xml:"parameter"` +} + +type XMLParameter struct { + Name string `xml:"name,attr"` + Value string `xml:",chardata"` +} + +// parseToolCall parses a raw tool call string into an api.ToolCall. +// The raw string follows an xml-like format, here's an example: +// +// +// +// San Francisco +// +// +// celsius +// +// +func parseToolCall(raw qwenEventRawToolCall, tools []api.Tool) (api.ToolCall, error) { + toolCall := api.ToolCall{} + + xmlString := transformToXML(raw.raw) + + var functionCall XMLFunctionCall + err := xml.Unmarshal([]byte(xmlString), &functionCall) + if err != nil { + return api.ToolCall{}, err + } + + toolCall.Function = api.ToolCallFunction{ + Name: functionCall.Name, + } + + // Find the matching tool to get parameter types + var matchedTool *api.Tool + for i := range tools { + if tools[i].Function.Name == functionCall.Name { + matchedTool = &tools[i] + break + } + } + + toolCall.Function.Arguments = make(api.ToolCallFunctionArguments) + for _, parameter := range functionCall.Parameters { + // Look up the parameter type if we found the tool + var paramType api.PropertyType + if matchedTool != nil && matchedTool.Function.Parameters.Properties != nil { + if prop, ok := matchedTool.Function.Parameters.Properties[parameter.Name]; ok { + paramType = prop.Type + } + } + + toolCall.Function.Arguments[parameter.Name] = parseValue(parameter.Value, paramType) + } + + return toolCall, nil +} + +// parseValue converts a raw string value to the appropriate type based on the parameter type specification. +// +// For union types (multiple types in PropertyType, which we support but doesn't +// seem as though the reference parser does type coercion with those types in +// mind) we use a type precedence approach: +// 1. null - checked first regardless of declared types (matches reference implementation) +// 2. boolean - only "true"/"false" are valid booleans +// 3. integer - must parse as a whole number +// 4. number - must parse as numeric (returns int if no decimal part) +// 5. array - must parse as valid JSON array +// 6. object - must parse as valid JSON object +// 7. string - always succeeds (least specific type) +// +// This precedence ensures we return the most specific type that successfully parses, +// following the principle of least surprise. For example, with PropertyType{"string", "number"}, +// "123" becomes 123 (number), while "hello" becomes "hello" (string). +func parseValue(raw string, paramType api.PropertyType) any { + // first remove a single leading newlines, and a single trailing newline (if + // they exist). This follows the reference implementation + raw = strings.TrimPrefix(raw, "\n") + raw = strings.TrimSuffix(raw, "\n") + + // Check for null first (case-insensitive) - this takes precedence over any type + if strings.ToLower(raw) == "null" { + return nil + } + + // If no type is specified, default to string + if len(paramType) == 0 { + return raw + } + + // Check if any of the specified types match, using type precedence + // Order: boolean -> integer -> number -> array -> object -> string + typeSet := make(map[string]bool) + for _, t := range paramType { + typeSet[t] = true + } + + // Try boolean first (most restrictive) + if typeSet["boolean"] { + lower := strings.ToLower(raw) + switch lower { + case "true": + return true + case "false": + return false + } + // If not a valid boolean but boolean is the only type, return false (matching reference) + if len(paramType) == 1 { + return false + } + // Otherwise try other types + } + + // Try integer + if typeSet["integer"] { + if i, err := strconv.ParseInt(raw, 10, 64); err == nil { + // Return as int if it fits in int32, otherwise int64 + if i >= math.MinInt32 && i <= math.MaxInt32 { + return int(i) + } + return i + } + // If integer is the only type and parsing failed, fall back to string + if len(paramType) == 1 { + return raw + } + } + + // Try number (float) + if typeSet["number"] { + if f, err := strconv.ParseFloat(raw, 64); err == nil { + // If the number has no decimal part, return as int (matching reference) + if f == math.Trunc(f) { + i := int64(f) + if i >= math.MinInt32 && i <= math.MaxInt32 { + return int(i) + } + return i + } + return f + } + // If number is the only type and parsing failed, fall back to string + if len(paramType) == 1 { + return raw + } + } + + // Try array + if typeSet["array"] { + var arr []interface{} + if err := json.Unmarshal([]byte(raw), &arr); err == nil { + return arr + } + // If array is the only type and parsing failed, fall back to string + if len(paramType) == 1 { + return raw + } + } + + // Try object + if typeSet["object"] { + var obj map[string]interface{} + if err := json.Unmarshal([]byte(raw), &obj); err == nil { + return obj + } + // If object is the only type and parsing failed, fall back to string + if len(paramType) == 1 { + return raw + } + } + + // String always succeeds (or if "string" is in the type set) + if typeSet["string"] { + return raw + } + + // If we get here, none of the types matched and string wasn't an option + // We return string as a fallback. The reference implementation will attempt + // to parse the value as a python literal, but we purposefully don't support + // that + return raw +} + +var qwenTagRegex = regexp.MustCompile(`<(\w+)=([^>]+)>`) + +// transformToXML transforms a raw qwen tool call with xml-like tags into valid +// xml so that it can be parsed by any xml parser +func transformToXML(raw string) string { + // take the form `` and transform it to ``, taking + // care to properly escape the string that becomes the attribute value + return qwenTagRegex.ReplaceAllStringFunc(raw, func(match string) string { + groups := qwenTagRegex.FindStringSubmatch(match) + tag := groups[1] + var escapedValue strings.Builder + xml.EscapeText(&escapedValue, []byte(groups[2])) + return fmt.Sprintf(`<%s name="%s">`, tag, escapedValue.String()) + }) +} diff --git a/model/parsers/qwen3coder_test.go b/model/parsers/qwen3coder_test.go new file mode 100644 index 0000000000..2389c77b58 --- /dev/null +++ b/model/parsers/qwen3coder_test.go @@ -0,0 +1,830 @@ +package parsers + +import ( + "reflect" + "testing" + + "github.com/ollama/ollama/api" +) + +// tool creates a test tool with the given name and properties +func tool(name string, props map[string]api.ToolProperty) api.Tool { + t := api.Tool{Type: "function", Function: api.ToolFunction{Name: name}} + t.Function.Parameters.Type = "object" + t.Function.Parameters.Properties = props + return t +} + +func TestQwenParserStreaming(t *testing.T) { + type step struct { + input string + wantEvents []qwenEvent + } + + cases := []struct { + desc string + steps []step + only bool + }{ + { + desc: "simple message streamed word by word", + steps: []step{ + { + input: "hi", + wantEvents: []qwenEvent{qwenEventContent{content: "hi"}}, + }, + { + input: " there", + wantEvents: []qwenEvent{qwenEventContent{content: " there"}}, + }, + }, + }, + { + desc: "content before tool call", + steps: []step{ + { + input: "hi there", + wantEvents: []qwenEvent{qwenEventContent{content: "hi there"}}, + }, + }, + }, + { + desc: "multiple tool calls in one message", + steps: []step{ + { + input: "before1in tool callafter1in tool call 2after2", + wantEvents: []qwenEvent{ + qwenEventContent{content: "before1"}, + qwenEventRawToolCall{raw: "in tool call"}, + qwenEventContent{content: "after1"}, + qwenEventRawToolCall{raw: "in tool call 2"}, + qwenEventContent{content: "after2"}, + }, + }, + }, + }, + { + desc: "tool calls with split tags", + steps: []step{ + { + input: "beforein tool callaf", + wantEvents: []qwenEvent{ + qwenEventRawToolCall{raw: "in tool call"}, + qwenEventContent{content: "af"}, + }, + }, + { + input: "ter", + wantEvents: []qwenEvent{ + qwenEventContent{content: "ter"}, + }, + }, + }, + }, + { + desc: "trailing whitespace between content and tool call", + steps: []step{ + { + input: "abc\ndef", + wantEvents: []qwenEvent{ + qwenEventContent{content: "abc"}, + qwenEventRawToolCall{raw: "def"}, + }, + }, + }, + }, + { + desc: "trailing whitespace between tool call and content", + steps: []step{ + { + input: "abc\ndef", + wantEvents: []qwenEvent{ + qwenEventRawToolCall{raw: "abc"}, + qwenEventContent{content: "def"}, + }, + }, + }, + }, + { + desc: "empty content before tool call", + steps: []step{ + { + input: "\nabc", + wantEvents: []qwenEvent{ + qwenEventRawToolCall{raw: "abc"}, + }, + }, + }, + }, + { + desc: "partial tool open tag fakeout", + steps: []step{ + { + input: "abc\n + +San Francisco + + +celsius + +`, + wantToolCall: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "get_current_temperature", + Arguments: map[string]any{ + "location": "San Francisco", + "unit": "celsius", + }, + }, + }, + }, + { + name: "names with spaces", + tools: []api.Tool{}, + rawToolCall: ` + +San Francisco + + +celsius + +`, + wantToolCall: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "get current temperature", + Arguments: map[string]any{ + "location with spaces": "San Francisco", + "unit with spaces": "celsius", + }, + }, + }, + }, + // this mirrors the reference implementation's behavior, but unclear if it + // ever happens. If so, then we should probably remove them instead, this + // test is to just document the current behavior and test that we don't get + // xml errors + { + name: "names with quotes", + tools: []api.Tool{}, + rawToolCall: ` + +San Francisco + + +"celsius" + +`, + wantToolCall: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "\"get current temperature\"", + Arguments: map[string]any{ + "\"location with spaces\"": "San Francisco", + "\"unit with spaces\"": "\"celsius\"", + }, + }, + }, + }, + { + name: "tool call with typed parameters", + tools: []api.Tool{ + tool("calculate", map[string]api.ToolProperty{ + "x": {Type: api.PropertyType{"number"}}, + "y": {Type: api.PropertyType{"integer"}}, + "enabled": {Type: api.PropertyType{"boolean"}}, + "items": {Type: api.PropertyType{"array"}}, + }), + }, + rawToolCall: ` + +3.14 + + +42 + + +true + + +["a", "b", "c"] + +`, + wantToolCall: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "calculate", + Arguments: map[string]any{ + "x": 3.14, + "y": 42, + "enabled": true, + "items": []any{"a", "b", "c"}, + }, + }, + }, + }, + } + + for i, step := range steps { + gotToolCall, err := parseToolCall(qwenEventRawToolCall{raw: step.rawToolCall}, step.tools) + if err != nil { + t.Errorf("step %d (%s): %v", i, step.name, err) + } + if !reflect.DeepEqual(gotToolCall, step.wantToolCall) { + t.Errorf("step %d (%s): got tool call %#v, want %#v", i, step.name, gotToolCall, step.wantToolCall) + } + } +} + +func TestQwenToolCallValueParsing(t *testing.T) { + cases := []struct { + desc string + raw string + paramType api.PropertyType + want any + }{ + { + desc: "default string value (no type specified)", + paramType: api.PropertyType{}, + raw: "some-string", + want: "some-string", + }, + { + desc: "trim a single leading and trailing newline", + paramType: api.PropertyType{}, + raw: "\nsome-string\n", + want: "some-string", + }, + { + desc: "trim at most one leading and trailing newline", + paramType: api.PropertyType{}, + raw: "\n\nsome-string\n\n", + want: "\nsome-string\n", + }, + { + desc: "newline really has to be the first character to be trimmed", + paramType: api.PropertyType{}, + raw: " \nsome-string\n ", + want: " \nsome-string\n ", + }, + { + desc: "numeric type", + paramType: api.PropertyType{"number"}, + raw: "123", + want: 123, + }, + // Integer parsing tests + { + desc: "integer type", + paramType: api.PropertyType{"integer"}, + raw: "42", + want: 42, + }, + { + desc: "negative integer", + paramType: api.PropertyType{"integer"}, + raw: "-100", + want: -100, + }, + { + desc: "zero integer", + paramType: api.PropertyType{"integer"}, + raw: "0", + want: 0, + }, + { + desc: "integer with leading zeros", + paramType: api.PropertyType{"integer"}, + raw: "007", + want: 7, + }, + { + desc: "large integer", + paramType: api.PropertyType{"integer"}, + raw: "2147483648", // Just beyond int32 max + want: int64(2147483648), + }, + // Float/number parsing tests + { + desc: "float type", + paramType: api.PropertyType{"number"}, + raw: "3.14", + want: 3.14, + }, + { + desc: "negative float", + paramType: api.PropertyType{"number"}, + raw: "-273.15", + want: -273.15, + }, + { + desc: "float without decimal part", + paramType: api.PropertyType{"number"}, + raw: "100.0", + want: 100, + }, + { + desc: "scientific notation positive", + paramType: api.PropertyType{"number"}, + raw: "1.23e5", + want: 123000, // Will be int since it has no decimal part + }, + { + desc: "scientific notation negative", + paramType: api.PropertyType{"number"}, + raw: "1.5e-3", + want: 0.0015, + }, + { + desc: "very small float", + paramType: api.PropertyType{"number"}, + raw: "0.00000001", + want: 0.00000001, + }, + // String parsing tests + { + desc: "explicit string type", + paramType: api.PropertyType{"string"}, + raw: "hello world", + want: "hello world", + }, + { + desc: "string with special characters", + paramType: api.PropertyType{"string"}, + raw: "/usr/local/bin/test-file_v2.0.sh", + want: "/usr/local/bin/test-file_v2.0.sh", + }, + { + desc: "string with quotes", + paramType: api.PropertyType{"string"}, + raw: `He said "hello" to me`, + want: `He said "hello" to me`, + }, + { + desc: "multiline string", + paramType: api.PropertyType{"string"}, + raw: "line one\nline two\nline three", + want: "line one\nline two\nline three", + }, + { + desc: "empty string", + paramType: api.PropertyType{"string"}, + raw: "", + want: "", + }, + { + desc: "string that looks like a number", + paramType: api.PropertyType{"string"}, + raw: "12345", + want: "12345", + }, + // Boolean parsing tests + { + desc: "boolean true", + paramType: api.PropertyType{"boolean"}, + raw: "true", + want: true, + }, + { + desc: "boolean false", + paramType: api.PropertyType{"boolean"}, + raw: "false", + want: false, + }, + { + desc: "boolean case insensitive true", + paramType: api.PropertyType{"boolean"}, + raw: "True", + want: true, + }, + { + desc: "boolean case insensitive false", + paramType: api.PropertyType{"boolean"}, + raw: "FALSE", + want: false, + }, + // Null parsing tests + { + desc: "null value lowercase", + paramType: api.PropertyType{"string"}, + raw: "null", + want: nil, + }, + { + desc: "null value case insensitive", + paramType: api.PropertyType{"integer"}, + raw: "NULL", + want: nil, + }, + // Array parsing tests + { + desc: "array of strings", + paramType: api.PropertyType{"array"}, + raw: `["foo", "bar", "baz"]`, + want: []any{"foo", "bar", "baz"}, + }, + { + desc: "array of numbers", + paramType: api.PropertyType{"array"}, + raw: `[1, 2.5, 3]`, + want: []any{float64(1), 2.5, float64(3)}, + }, + { + desc: "array of mixed types", + paramType: api.PropertyType{"array"}, + raw: `["string", 123, true, null]`, + want: []any{"string", float64(123), true, nil}, + }, + { + desc: "empty array", + paramType: api.PropertyType{"array"}, + raw: `[]`, + want: []any{}, + }, + // Object parsing tests + { + desc: "simple object", + paramType: api.PropertyType{"object"}, + raw: `{"key": "value", "number": 42}`, + want: map[string]any{"key": "value", "number": float64(42)}, + }, + { + desc: "nested object", + paramType: api.PropertyType{"object"}, + raw: `{"outer": {"inner": "value"}}`, + want: map[string]any{"outer": map[string]any{"inner": "value"}}, + }, + { + desc: "empty object", + paramType: api.PropertyType{"object"}, + raw: `{}`, + want: map[string]any{}, + }, + // Error cases and fallback behavior + { + desc: "invalid integer falls back to string", + paramType: api.PropertyType{"integer"}, + raw: "not-a-number", + want: "not-a-number", + }, + { + desc: "invalid float falls back to string", + paramType: api.PropertyType{"number"}, + raw: "3.14.159", + want: "3.14.159", + }, + { + desc: "invalid boolean falls back to false", + paramType: api.PropertyType{"boolean"}, + raw: "yes", + want: false, + }, + { + desc: "invalid JSON array falls back to string", + paramType: api.PropertyType{"array"}, + raw: "[1, 2, unclosed", + want: "[1, 2, unclosed", + }, + { + desc: "invalid JSON object falls back to string", + paramType: api.PropertyType{"object"}, + raw: `{"key": unclosed`, + want: `{"key": unclosed`, + }, + // Edge cases + { + desc: "integer overflow should use int64", + paramType: api.PropertyType{"integer"}, + raw: "2147483648", // Beyond int32 max + want: int64(2147483648), + }, + { + desc: "float with many decimal places", + paramType: api.PropertyType{"number"}, + raw: "3.141592653589793", + want: 3.141592653589793, + }, + { + desc: "string with JSON-like content", + paramType: api.PropertyType{"string"}, + raw: `{"this": "is", "just": "a string"}`, + want: `{"this": "is", "just": "a string"}`, + }, + { + desc: "whitespace-only string", + paramType: api.PropertyType{"string"}, + raw: " ", + want: " ", + }, + // Unknown parameter (no type specified in tools) + { + desc: "parameter not in tool definition defaults to string", + paramType: api.PropertyType{}, + raw: "some value", + want: "some value", + }, + // Union type tests + { + desc: "string or number union - valid number", + paramType: api.PropertyType{"string", "number"}, + raw: "42.5", + want: 42.5, + }, + { + desc: "string or number union - non-numeric string", + paramType: api.PropertyType{"string", "number"}, + raw: "hello", + want: "hello", + }, + { + desc: "number or string union - valid number (order shouldn't matter)", + paramType: api.PropertyType{"number", "string"}, + raw: "42.5", + want: 42.5, + }, + { + desc: "integer or null union - valid integer", + paramType: api.PropertyType{"integer", "null"}, + raw: "123", + want: 123, + }, + { + desc: "integer or null union - null value", + paramType: api.PropertyType{"integer", "null"}, + raw: "null", + want: nil, + }, + { + desc: "null or integer union - null value (order shouldn't matter)", + paramType: api.PropertyType{"null", "integer"}, + raw: "null", + want: nil, + }, + { + desc: "boolean or string union - valid boolean", + paramType: api.PropertyType{"boolean", "string"}, + raw: "true", + want: true, + }, + { + desc: "boolean or string union - non-boolean becomes string", + paramType: api.PropertyType{"boolean", "string"}, + raw: "yes", + want: "yes", + }, + { + desc: "string or boolean union - valid boolean (precedence test)", + paramType: api.PropertyType{"string", "boolean"}, + raw: "false", + want: false, // Should be boolean, not string "false" + }, + { + desc: "integer or number union - integer value", + paramType: api.PropertyType{"integer", "number"}, + raw: "42", + want: 42, + }, + { + desc: "integer or number union - float value", + paramType: api.PropertyType{"integer", "number"}, + raw: "42.5", + want: 42.5, + }, + { + desc: "number or integer union - integer value (precedence test)", + paramType: api.PropertyType{"number", "integer"}, + raw: "42", + want: 42, // Should try integer first due to precedence + }, + { + desc: "array or object union - valid array", + paramType: api.PropertyType{"array", "object"}, + raw: `[1, 2, 3]`, + want: []any{float64(1), float64(2), float64(3)}, + }, + { + desc: "array or object union - valid object", + paramType: api.PropertyType{"array", "object"}, + raw: `{"key": "value"}`, + want: map[string]any{"key": "value"}, + }, + { + desc: "object or array union - valid array (precedence test)", + paramType: api.PropertyType{"object", "array"}, + raw: `[1, 2, 3]`, + want: []any{float64(1), float64(2), float64(3)}, + }, + { + desc: "complex multi-type union - null", + paramType: api.PropertyType{"string", "number", "boolean", "null"}, + raw: "null", + want: nil, + }, + { + desc: "complex multi-type union - boolean", + paramType: api.PropertyType{"string", "number", "boolean", "null"}, + raw: "true", + want: true, + }, + { + desc: "complex multi-type union - number", + paramType: api.PropertyType{"string", "number", "boolean", "null"}, + raw: "3.14", + want: 3.14, + }, + { + desc: "complex multi-type union - string", + paramType: api.PropertyType{"string", "number", "boolean", "null"}, + raw: "hello", + want: "hello", + }, + { + desc: "integer string union - integer string becomes integer", + paramType: api.PropertyType{"integer", "string"}, + raw: "123", + want: 123, + }, + { + desc: "string integer union - integer string becomes integer (precedence)", + paramType: api.PropertyType{"string", "integer"}, + raw: "123", + want: 123, // Integer has higher precedence than string + }, + } + + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + got := parseValue(tc.raw, tc.paramType) + if !reflect.DeepEqual(got, tc.want) { + t.Errorf("got %v (type %T), want %v (type %T)", got, got, tc.want, tc.want) + } + }) + } +} + +func TestQwenXMLTransform(t *testing.T) { + cases := []struct { + desc string + raw string + want string + }{ + { + desc: "simple example", + raw: ` + +San Francisco + + +celsius + +`, + want: ` + +San Francisco + + +celsius + +`, + }, + // even though quotes aren't expected in these tags, we have these tests to + // make sure they're escaped so they don't blow up the xml parser in case + // they happen + { + desc: "names with quotes", + raw: ` + +San Francisco + + +celsius + +`, + want: ` + +San Francisco + + +celsius + +`, + }, + } + + for _, tc := range cases { + got := transformToXML(tc.raw) + if got != tc.want { + t.Errorf("got %q, want %q", got, tc.want) + } + } +} + +func TestTrailingWhitespaceLen(t *testing.T) { + cases := []struct { + desc string + s string + want int + }{ + {desc: "no whitespace", s: "abc", want: 0}, + {desc: "trailing whitespace", s: "abc ", want: 1}, + {desc: "trailing whitespace with newlines", s: "abc \n", want: 2}, + {desc: "only whitespace", s: " \n ", want: 4}, + {desc: "leading whitespace doesn't count", s: " \n abc", want: 0}, + } + + for _, tc := range cases { + got := trailingWhitespaceLen(tc.s) + if got != tc.want { + t.Errorf("got %d, want %d", got, tc.want) + } + } +} diff --git a/model/renderers/qwen3coder.go b/model/renderers/qwen3coder.go new file mode 100644 index 0000000000..df3b3a45bc --- /dev/null +++ b/model/renderers/qwen3coder.go @@ -0,0 +1,217 @@ +package renderers + +import ( + "encoding/json" + "fmt" + "reflect" + "strings" + + "github.com/ollama/ollama/api" +) + +var ( + imStartTag = "<|im_start|>" + imEndTag = "<|im_end|>" +) + +// renderAdditionalKeys renders all JSON fields except the ones in handledKeys +// This follows the same approach from the reference implementation, which gives +// a particular key ordering +func renderAdditionalKeys(obj any, handledKeys map[string]bool) string { + data, err := json.Marshal(obj) + if err != nil { + return "" + } + + var m map[string]any + if err := json.Unmarshal(data, &m); err != nil { + return "" + } + + var sb strings.Builder + for key, value := range m { + if handledKeys[key] { + continue + } + + // Check if value is a map or array (needs JSON serialization) + switch v := value.(type) { + case map[string]any, []any: + jsonBytes, _ := json.Marshal(v) + // TODO(drifkin): it would be nice to format the JSON here similarly to + // python's default json.dumps behavior (spaces after commas and colons). + // This would let us be byte-for-byte compatible with the reference + // implementation for most common inputs + jsonStr := string(jsonBytes) + sb.WriteString("\n<" + key + ">" + jsonStr + "") + case nil: + continue + default: + // Simple types, convert to string + sb.WriteString("\n<" + key + ">" + fmt.Sprintf("%v", value) + "") + } + } + + return sb.String() +} + +func Qwen3CoderRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) { + var sb strings.Builder + + // filter out system messages and choose the first (if any) to win + var systemMessage string + var filteredMessages []api.Message + for _, message := range messages { + if message.Role != "system" { + filteredMessages = append(filteredMessages, message) + continue + } + + if systemMessage == "" { + systemMessage = message.Content + } + } + + if systemMessage != "" || len(tools) > 0 { + sb.WriteString(imStartTag + "system\n") + + // if we have tools but no system message, match the reference implementation by providing a default system message + if systemMessage == "" { + systemMessage = "You are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." + } + + sb.WriteString(systemMessage) + + if len(tools) > 0 { + sb.WriteString("\n\n# Tools\n\nYou have access to the following functions:\n\n") + sb.WriteString("") + for _, tool := range tools { + sb.WriteString("\n") + sb.WriteString("\n") + sb.WriteString("" + tool.Function.Name + "") + if tool.Function.Description != "" { + sb.WriteString("\n" + tool.Function.Description + "") + } + sb.WriteString("\n") + + for name, prop := range tool.Function.Parameters.Properties { + sb.WriteString("\n") + sb.WriteString("\n" + name + "") + + if len(prop.Type) > 0 { + // TODO(!!!)(drifkin): we should match the reference implementation for + // more complex types here instead of using this format + sb.WriteString("\n" + prop.ToTypeScriptType() + "") + } + + if prop.Description != "" { + sb.WriteString("\n" + prop.Description + "") + } + + // Render any additional keys not already handled + handledKeys := map[string]bool{ + "type": true, + "description": true, + } + sb.WriteString(renderAdditionalKeys(prop, handledKeys)) + + sb.WriteString("\n") + } + + // Render extra keys for parameters (everything except 'type' and 'properties') + paramHandledKeys := map[string]bool{ + "type": true, + "properties": true, + } + sb.WriteString(renderAdditionalKeys(tool.Function.Parameters, paramHandledKeys)) + + sb.WriteString("\n") + sb.WriteString("\n") + } + sb.WriteString("\n") + sb.WriteString("\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n") + } + + sb.WriteString(imEndTag + "\n") + } + + for i, message := range filteredMessages { + lastMessage := i == len(filteredMessages)-1 + prefill := lastMessage && message.Role == "assistant" + switch message.Role { + case "assistant": + if len(message.ToolCalls) > 0 { + sb.WriteString(imStartTag + "assistant\n") + if message.Content != "" { + sb.WriteString(message.Content + "\n") + } + for _, toolCall := range message.ToolCalls { + sb.WriteString("\n\n") + for name, value := range toolCall.Function.Arguments { + valueStr := formatToolCallArgument(value) + sb.WriteString("\n\n" + valueStr + "\n") + } + sb.WriteString("\n\n") + } + sb.WriteString("<|im_end|>\n") + } else { + sb.WriteString(imStartTag + "assistant\n") + sb.WriteString(message.Content) + if !prefill { + sb.WriteString(imEndTag + "\n") + } + } + case "tool": + // consecutive tool responses should share a single `user`, but + // have their own tags + + // only start a new user block if this is the first tool response + if i == 0 || filteredMessages[i-1].Role != "tool" { + sb.WriteString(imStartTag + "user\n") + } + + sb.WriteString("\n") + sb.WriteString(message.Content) + sb.WriteString("\n\n") + + // close the user block only if this is the last tool response + if i == len(filteredMessages)-1 || filteredMessages[i+1].Role != "tool" { + sb.WriteString(imEndTag + "\n") + } + default: + sb.WriteString(imStartTag + message.Role + "\n") + sb.WriteString(message.Content) + sb.WriteString(imEndTag + "\n") + } + + if lastMessage && !prefill { + sb.WriteString(imStartTag + "assistant\n") + } + } + + return sb.String(), nil +} + +func formatToolCallArgument(value any) string { + if value == nil { + return "null" + } + + switch v := value.(type) { + case string: + return v + case []byte: + return string(v) + } + + if reflect.TypeOf(value) != nil { + kind := reflect.TypeOf(value).Kind() + if kind == reflect.Map || kind == reflect.Slice || kind == reflect.Array { + if marshalled, err := json.Marshal(value); err == nil { + return string(marshalled) + } + } + } + + return fmt.Sprintf("%v", value) +} diff --git a/model/renderers/qwen3coder_test.go b/model/renderers/qwen3coder_test.go new file mode 100644 index 0000000000..4aaa066d69 --- /dev/null +++ b/model/renderers/qwen3coder_test.go @@ -0,0 +1,338 @@ +package renderers + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/ollama/ollama/api" +) + +func TestQwen3CoderRenderer(t *testing.T) { + tests := []struct { + name string + msgs []api.Message + tools []api.Tool + expected string + }{ + { + name: "basic", + msgs: []api.Message{ + {Role: "system", Content: "You are a helpful assistant."}, + {Role: "user", Content: "Hello, how are you?"}, + }, + expected: `<|im_start|>system +You are a helpful assistant.<|im_end|> +<|im_start|>user +Hello, how are you?<|im_end|> +<|im_start|>assistant +`, + }, + { + name: "with tools and response", + msgs: []api.Message{ + {Role: "system", Content: "You are a helpful assistant with access to tools."}, + {Role: "user", Content: "What is the weather like in San Francisco?"}, + { + Role: "assistant", + Content: "I'll check the weather in San Francisco for you.", + ToolCalls: []api.ToolCall{ + { + Function: api.ToolCallFunction{ + Name: "get_weather", + Arguments: map[string]any{ + "unit": "fahrenheit", + }, + }, + }, + }, + }, + {Role: "tool", Content: "{\"location\": \"San Francisco, CA\", \"temperature\": 68, \"condition\": \"partly cloudy\", \"humidity\": 65, \"wind_speed\": 12}", ToolName: "get_weather"}, + {Role: "user", Content: "That sounds nice! What about New York?"}, + }, + tools: []api.Tool{ + {Function: api.ToolFunction{ + Name: "get_weather", + Description: "Get the current weather in a given location", + Parameters: api.ToolFunctionParameters{ + Required: []string{"unit"}, + Properties: map[string]api.ToolProperty{ + "unit": {Type: api.PropertyType{"string"}, Enum: []any{"celsius", "fahrenheit"}, Description: "The unit of temperature"}, + // TODO(drifkin): add multiple params back once we have predictable + // order via some sort of ordered map type (see + // ) + /* + "location": {Type: api.PropertyType{"string"}, Description: "The city and state, e.g. San Francisco, CA"}, + */ + }, + }, + }}, + }, + expected: `<|im_start|>system +You are a helpful assistant with access to tools. + +# Tools + +You have access to the following functions: + + + +get_weather +Get the current weather in a given location + + +unit +string +The unit of temperature +["celsius","fahrenheit"] + +["unit"] + + + + +If you choose to call a function ONLY reply in the following format with NO suffix: + + + + +value_1 + + +This is the value for the second parameter +that can span +multiple lines + + + + + +Reminder: +- Function calls MUST follow the specified format: an inner block must be nested within XML tags +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +<|im_end|> +<|im_start|>user +What is the weather like in San Francisco?<|im_end|> +<|im_start|>assistant +I'll check the weather in San Francisco for you. + + + + +fahrenheit + + +<|im_end|> +<|im_start|>user + +{"location": "San Francisco, CA", "temperature": 68, "condition": "partly cloudy", "humidity": 65, "wind_speed": 12} + +<|im_end|> +<|im_start|>user +That sounds nice! What about New York?<|im_end|> +<|im_start|>assistant +`, + }, + { + name: "parallel tool calls", + msgs: []api.Message{ + {Role: "system", Content: "You are a helpful assistant with access to tools."}, + {Role: "user", Content: "call double(1) and triple(2)"}, + {Role: "assistant", Content: "I'll call double(1) and triple(2) for you.", ToolCalls: []api.ToolCall{ + {Function: api.ToolCallFunction{Name: "double", Arguments: map[string]any{"number": "1"}}}, + {Function: api.ToolCallFunction{Name: "triple", Arguments: map[string]any{"number": "2"}}}, + }}, + {Role: "tool", Content: "{\"number\": 2}", ToolName: "double"}, + {Role: "tool", Content: "{\"number\": 6}", ToolName: "triple"}, + }, + tools: []api.Tool{ + {Function: api.ToolFunction{Name: "double", Description: "Double a number", Parameters: api.ToolFunctionParameters{Properties: map[string]api.ToolProperty{ + "number": {Type: api.PropertyType{"string"}, Description: "The number to double"}, + }}}}, + {Function: api.ToolFunction{Name: "triple", Description: "Triple a number", Parameters: api.ToolFunctionParameters{Properties: map[string]api.ToolProperty{ + "number": {Type: api.PropertyType{"string"}, Description: "The number to triple"}, + }}}}, + }, + expected: `<|im_start|>system +You are a helpful assistant with access to tools. + +# Tools + +You have access to the following functions: + + + +double +Double a number + + +number +string +The number to double + + + + +triple +Triple a number + + +number +string +The number to triple + + + + + +If you choose to call a function ONLY reply in the following format with NO suffix: + + + + +value_1 + + +This is the value for the second parameter +that can span +multiple lines + + + + + +Reminder: +- Function calls MUST follow the specified format: an inner block must be nested within XML tags +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +<|im_end|> +<|im_start|>user +call double(1) and triple(2)<|im_end|> +<|im_start|>assistant +I'll call double(1) and triple(2) for you. + + + + +1 + + + + + + +2 + + +<|im_end|> +<|im_start|>user + +{"number": 2} + + +{"number": 6} + +<|im_end|> +<|im_start|>assistant +`, + }, + { + name: "prefill", + msgs: []api.Message{ + {Role: "system", Content: "You are a helpful assistant."}, + {Role: "user", Content: "Tell me something interesting."}, + {Role: "assistant", Content: "I'll tell you something interesting about cats"}, + }, + expected: `<|im_start|>system +You are a helpful assistant.<|im_end|> +<|im_start|>user +Tell me something interesting.<|im_end|> +<|im_start|>assistant +I'll tell you something interesting about cats`, + }, + { + name: "complex tool call arguments should remain json encoded", + msgs: []api.Message{ + {Role: "user", Content: "call tool"}, + {Role: "assistant", ToolCalls: []api.ToolCall{ + {Function: api.ToolCallFunction{ + Name: "echo", + Arguments: map[string]any{ + "payload": map[string]any{"foo": "bar"}, + }, + }}, + }}, + {Role: "tool", Content: "{\"payload\": {\"foo\": \"bar\"}}", ToolName: "echo"}, + }, + expected: `<|im_start|>user +call tool<|im_end|> +<|im_start|>assistant + + + + +{"foo":"bar"} + + +<|im_end|> +<|im_start|>user + +{"payload": {"foo": "bar"}} + +<|im_end|> +<|im_start|>assistant +`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rendered, err := Qwen3CoderRenderer(tt.msgs, tt.tools, nil) + if err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(rendered, tt.expected); diff != "" { + t.Errorf("mismatch (-got +want):\n%s", diff) + } + }) + } +} + +func TestFormatToolCallArgument(t *testing.T) { + tests := []struct { + name string + arg any + expected string + }{ + { + name: "string", + arg: "foo", + // notice no quotes around the string + expected: "foo", + }, + { + name: "map", + arg: map[string]any{"foo": "bar"}, + expected: "{\"foo\":\"bar\"}", + }, + { + name: "number", + arg: 1, + expected: "1", + }, + { + name: "boolean", + arg: true, + expected: "true", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := formatToolCallArgument(tt.arg) + if got != tt.expected { + t.Errorf("formatToolCallArgument(%v) = %v, want %v", tt.arg, got, tt.expected) + } + }) + } +} diff --git a/model/renderers/renderer.go b/model/renderers/renderer.go new file mode 100644 index 0000000000..2dfb51e490 --- /dev/null +++ b/model/renderers/renderer.go @@ -0,0 +1,26 @@ +package renderers + +import ( + "fmt" + + "github.com/ollama/ollama/api" +) + +type rendererFunc func([]api.Message, []api.Tool, *api.ThinkValue) (string, error) + +func RenderWithRenderer(name string, msgs []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) { + renderer := rendererForName(name) + if renderer == nil { + return "", fmt.Errorf("unknown renderer %q", name) + } + return renderer(msgs, tools, think) +} + +func rendererForName(name string) rendererFunc { + switch name { + case "qwen3-coder": + return Qwen3CoderRenderer + default: + return nil + } +} diff --git a/openai/openai.go b/openai/openai.go index b6a8a95e23..7ef5ac6de1 100644 --- a/openai/openai.go +++ b/openai/openai.go @@ -105,16 +105,18 @@ type ChatCompletionRequest struct { Tools []api.Tool `json:"tools"` Reasoning *Reasoning `json:"reasoning,omitempty"` ReasoningEffort *string `json:"reasoning_effort,omitempty"` + DebugRenderOnly bool `json:"_debug_render_only"` } type ChatCompletion struct { - Id string `json:"id"` - Object string `json:"object"` - Created int64 `json:"created"` - Model string `json:"model"` - SystemFingerprint string `json:"system_fingerprint"` - Choices []Choice `json:"choices"` - Usage Usage `json:"usage,omitempty"` + Id string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + Model string `json:"model"` + SystemFingerprint string `json:"system_fingerprint"` + Choices []Choice `json:"choices"` + Usage Usage `json:"usage,omitempty"` + DebugInfo *api.DebugInfo `json:"_debug_info,omitempty"` } type ChatCompletionChunk struct { @@ -141,6 +143,7 @@ type CompletionRequest struct { Temperature *float32 `json:"temperature"` TopP float32 `json:"top_p"` Suffix string `json:"suffix"` + DebugRenderOnly bool `json:"_debug_render_only"` } type Completion struct { @@ -273,8 +276,8 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion { } return nil }(r.DoneReason), - }}, - Usage: toUsage(r), + }}, Usage: toUsage(r), + DebugInfo: r.DebugInfo, } } @@ -568,13 +571,14 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) { } return &api.ChatRequest{ - Model: r.Model, - Messages: messages, - Format: format, - Options: options, - Stream: &r.Stream, - Tools: r.Tools, - Think: think, + Model: r.Model, + Messages: messages, + Format: format, + Options: options, + Stream: &r.Stream, + Tools: r.Tools, + Think: think, + DebugRenderOnly: r.DebugRenderOnly, }, nil } @@ -648,11 +652,12 @@ func fromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) { } return api.GenerateRequest{ - Model: r.Model, - Prompt: r.Prompt, - Options: options, - Stream: &r.Stream, - Suffix: r.Suffix, + Model: r.Model, + Prompt: r.Prompt, + Options: options, + Stream: &r.Stream, + Suffix: r.Suffix, + DebugRenderOnly: r.DebugRenderOnly, }, nil } diff --git a/parser/parser.go b/parser/parser.go index e080f1bb74..c2e8f981f3 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -100,6 +100,10 @@ func (f Modelfile) CreateRequest(relativeDir string) (*api.CreateRequest, error) req.System = c.Args case "license": licenses = append(licenses, c.Args) + case "renderer": + req.Renderer = c.Args + case "parser": + req.Parser = c.Args case "message": role, msg, _ := strings.Cut(c.Args, ": ") messages = append(messages, api.Message{Role: role, Content: msg}) @@ -320,7 +324,7 @@ func (c Command) String() string { switch c.Name { case "model": fmt.Fprintf(&sb, "FROM %s", c.Args) - case "license", "template", "system", "adapter": + case "license", "template", "system", "adapter", "renderer", "parser": fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args)) case "message": role, message, _ := strings.Cut(c.Args, ": ") @@ -346,7 +350,7 @@ const ( var ( errMissingFrom = errors.New("no FROM line") errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"") - errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"") + errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"renderer\", \"parser\", \"parameter\", or \"message\"") ) type ParserError struct { @@ -606,7 +610,7 @@ func isValidMessageRole(role string) bool { func isValidCommand(cmd string) bool { switch strings.ToLower(cmd) { - case "from", "license", "template", "system", "adapter", "parameter", "message": + case "from", "license", "template", "system", "adapter", "renderer", "parser", "parameter", "message": return true default: return false diff --git a/parser/parser_test.go b/parser/parser_test.go index 7d5a808bad..1524e890a7 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -198,6 +198,34 @@ BADCOMMAND param1 value1 } } +func TestParseFileRenderer(t *testing.T) { + input := ` +FROM foo +RENDERER renderer1 +` + + reader := strings.NewReader(input) + + modelfile, err := ParseFile(reader) + require.NoError(t, err) + + assert.Equal(t, []Command{{Name: "model", Args: "foo"}, {Name: "renderer", Args: "renderer1"}}, modelfile.Commands) +} + +func TestParseFileParser(t *testing.T) { + input := ` +FROM foo +PARSER parser1 +` + + reader := strings.NewReader(input) + + modelfile, err := ParseFile(reader) + require.NoError(t, err) + + assert.Equal(t, []Command{{Name: "model", Args: "foo"}, {Name: "parser", Args: "parser1"}}, modelfile.Commands) +} + func TestParseFileMessages(t *testing.T) { cases := []struct { input string diff --git a/server/create.go b/server/create.go index bd970876f0..f08f18b340 100644 --- a/server/create.go +++ b/server/create.go @@ -323,6 +323,8 @@ func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML, RootFS: RootFS{ Type: "layers", }, + Renderer: r.Renderer, + Parser: r.Parser, } var layers []Layer diff --git a/server/images.go b/server/images.go index 504eb95cf7..6432860f8e 100644 --- a/server/images.go +++ b/server/images.go @@ -24,6 +24,7 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/fs/gguf" + "github.com/ollama/ollama/model/parsers" "github.com/ollama/ollama/parser" "github.com/ollama/ollama/template" "github.com/ollama/ollama/thinking" @@ -94,8 +95,9 @@ func (m *Model) Capabilities() []model.Capability { return capabilities } + builtinParser := parsers.ParserForName(m.Config.Parser) // Check for tools capability - if slices.Contains(m.Template.Vars(), "tools") { + if slices.Contains(m.Template.Vars(), "tools") || (builtinParser != nil && builtinParser.HasToolSupport()) { capabilities = append(capabilities, model.CapabilityTools) } @@ -112,7 +114,8 @@ func (m *Model) Capabilities() []model.Capability { // Check for thinking capability openingTag, closingTag := thinking.InferTags(m.Template.Template) hasTags := openingTag != "" && closingTag != "" - if hasTags || slices.Contains([]string{"gptoss", "gpt-oss"}, m.Config.ModelFamily) { + isGptoss := slices.Contains([]string{"gptoss", "gpt-oss"}, m.Config.ModelFamily) + if hasTags || isGptoss || (builtinParser != nil && builtinParser.HasThinkingSupport()) { capabilities = append(capabilities, model.CapabilityThinking) } @@ -198,6 +201,20 @@ func (m *Model) String() string { }) } + if m.Config.Renderer != "" { + modelfile.Commands = append(modelfile.Commands, parser.Command{ + Name: "renderer", + Args: m.Config.Renderer, + }) + } + + if m.Config.Parser != "" { + modelfile.Commands = append(modelfile.Commands, parser.Command{ + Name: "parser", + Args: m.Config.Parser, + }) + } + for k, v := range m.Options { switch v := v.(type) { case []any: @@ -238,6 +255,8 @@ type ConfigV2 struct { ModelFamilies []string `json:"model_families"` ModelType string `json:"model_type"` FileType string `json:"file_type"` + Renderer string `json:"renderer,omitempty"` + Parser string `json:"parser,omitempty"` // required by spec Architecture string `json:"architecture"` diff --git a/server/prompt.go b/server/prompt.go index f1d8020ea6..56bc63030b 100644 --- a/server/prompt.go +++ b/server/prompt.go @@ -11,6 +11,7 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/llm" + "github.com/ollama/ollama/model/renderers" "github.com/ollama/ollama/template" ) @@ -41,18 +42,12 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api. } } - thinkVal := false - thinkLevel := "" - if think != nil { - thinkVal = think.Bool() - thinkLevel = think.String() - } - var b bytes.Buffer - if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[i:]...), Tools: tools, Think: thinkVal, ThinkLevel: thinkLevel, IsThinkSet: think != nil}); err != nil { + p, err := renderPrompt(m, append(system, msgs[i:]...), tools, think) + if err != nil { return "", nil, err } - s, err := tokenize(ctx, b.String()) + s, err := tokenize(ctx, p) if err != nil { return "", nil, err } @@ -101,6 +96,23 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api. } // truncate any messages that do not fit into the context window + p, err := renderPrompt(m, append(system, msgs[currMsgIdx:]...), tools, think) + if err != nil { + return "", nil, err + } + + return p, images, nil +} + +func renderPrompt(m *Model, msgs []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) { + if m.Config.Renderer != "" { + rendered, err := renderers.RenderWithRenderer(m.Config.Renderer, msgs, tools, think) + if err != nil { + return "", err + } + return rendered, nil + } + var b bytes.Buffer thinkVal := false thinkLevel := "" @@ -108,9 +120,8 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api. thinkVal = think.Bool() thinkLevel = think.String() } - if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[currMsgIdx:]...), Tools: tools, Think: thinkVal, ThinkLevel: thinkLevel, IsThinkSet: think != nil}); err != nil { - return "", nil, err + if err := m.Template.Execute(&b, template.Values{Messages: msgs, Tools: tools, Think: thinkVal, ThinkLevel: thinkLevel, IsThinkSet: think != nil}); err != nil { + return "", err } - - return b.String(), images, nil + return b.String(), nil } diff --git a/server/routes.go b/server/routes.go index 739ce69da1..e999c6c01e 100644 --- a/server/routes.go +++ b/server/routes.go @@ -35,6 +35,7 @@ import ( "github.com/ollama/ollama/harmony" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/logutil" + "github.com/ollama/ollama/model/parsers" "github.com/ollama/ollama/openai" "github.com/ollama/ollama/server/internal/client/ollama" "github.com/ollama/ollama/server/internal/registry" @@ -329,10 +330,10 @@ func (s *Server) GenerateHandler(c *gin.Context) { // If debug mode is enabled, return the rendered template instead of calling the model if req.DebugRenderOnly { - c.JSON(http.StatusOK, api.DebugTemplateResponse{ + c.JSON(http.StatusOK, api.GenerateResponse{ Model: req.Model, CreatedAt: time.Now().UTC(), - DebugInfo: api.DebugInfo{ + DebugInfo: &api.DebugInfo{ RenderedTemplate: prompt, ImageCount: len(images), }, @@ -1625,10 +1626,15 @@ func (s *Server) ChatHandler(c *gin.Context) { } msgs = filterThinkTags(msgs, m) + var builtinParser parsers.Parser + if m.Config.Parser != "" { + builtinParser = parsers.ParserForName(m.Config.Parser) + } + var harmonyMessageHandler *harmony.HarmonyMessageHandler var harmonyToolParser *harmony.HarmonyToolCallAccumulator - useHarmony := shouldUseHarmony(m) + useHarmony := shouldUseHarmony(m) || m.Config.Parser == "harmony" processedTools := req.Tools if useHarmony { @@ -1658,10 +1664,10 @@ func (s *Server) ChatHandler(c *gin.Context) { // If debug mode is enabled, return the rendered template instead of calling the model if req.DebugRenderOnly { - c.JSON(http.StatusOK, api.DebugTemplateResponse{ + c.JSON(http.StatusOK, api.ChatResponse{ Model: req.Model, CreatedAt: time.Now().UTC(), - DebugInfo: api.DebugInfo{ + DebugInfo: &api.DebugInfo{ RenderedTemplate: prompt, ImageCount: len(images), }, @@ -1721,6 +1727,7 @@ func (s *Server) ChatHandler(c *gin.Context) { res.LoadDuration = checkpointLoaded.Sub(checkpointStart) } + // TODO(drifkin): fold this as much as possibleinto the generic m.Config.Parser logic if useHarmony { content, thinking, toolContent := harmonyMessageHandler.AddContent(r.Content, harmonyToolParser) res.Message.Content = content @@ -1747,6 +1754,27 @@ func (s *Server) ChatHandler(c *gin.Context) { ch <- res } + return + } else if builtinParser != nil { + slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser input", "parser", m.Config.Parser, "content", r.Content) + + content, thinking, toolCalls, err := builtinParser.Add(r.Content, req.Tools) + if err != nil { + ch <- gin.H{"error": err.Error()} + return + } + + res.Message.Content = content + res.Message.Thinking = thinking + res.Message.ToolCalls = toolCalls + + if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || r.Done { + slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser output", "parser", m.Config.Parser, "content", content, "thinking", thinking, "toolCalls", toolCalls, "done", r.Done) + ch <- res + } else { + slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser empty output", "parser", m.Config.Parser) + } + return } diff --git a/server/routes_debug_test.go b/server/routes_debug_test.go index f04a1da99b..6507284ef7 100644 --- a/server/routes_debug_test.go +++ b/server/routes_debug_test.go @@ -180,7 +180,7 @@ func TestGenerateDebugRenderOnly(t *testing.T) { t.Errorf("expected status %d, got %d, body: %s", http.StatusOK, w.Code, w.Body.String()) } - var response api.DebugTemplateResponse + var response api.GenerateResponse if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil { t.Fatalf("failed to unmarshal response: %v", err) } @@ -385,7 +385,7 @@ func TestChatDebugRenderOnly(t *testing.T) { t.Errorf("expected status %d, got %d, body: %s", http.StatusOK, w.Code, w.Body.String()) } - var response api.DebugTemplateResponse + var response api.ChatResponse if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil { t.Fatalf("failed to unmarshal response: %v", err) }