From 242df70a759695dc258c00f56fa373ab5c06b8b2 Mon Sep 17 00:00:00 2001 From: Devon Rifkin Date: Sat, 20 Sep 2025 12:10:58 -0700 Subject: [PATCH] parsers: fix `&`s in qwen3coder parameter values In we that the model will output tool calls such as ``` pwd && ls -la ``` We parse this using the approach of transforming into valid xml and then using an xml parser. While we do transform the function and parameter names, we weren't escaping the parameter values (which in this example are invalid since `pwd && ls -la` contains unescaped ampersands). This has been fixed by first transforming the tags in the same way, and then walking the transformed string and escaping the text in between the tags. This also fixes a case where `<` in the middle of a parameter value would cause an xml parse failure. Fixes: #12357 --- model/parsers/qwen3coder.go | 41 +++++++++++++++++++++++++-- model/parsers/qwen3coder_test.go | 48 ++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 2 deletions(-) diff --git a/model/parsers/qwen3coder.go b/model/parsers/qwen3coder.go index b0e8ec48cd..a7838fb78b 100644 --- a/model/parsers/qwen3coder.go +++ b/model/parsers/qwen3coder.go @@ -393,18 +393,55 @@ func parseValue(raw string, paramType api.PropertyType) any { return raw } -var qwenTagRegex = regexp.MustCompile(`<(\w+)=([^>]+)>`) +var ( + qwenTagRegex = regexp.MustCompile(`<(\w+)=([^>]+)>`) + qwenXMLTagRegex = regexp.MustCompile(``) +) // transformToXML transforms a raw qwen tool call with xml-like tags into valid // xml so that it can be parsed by any xml parser func transformToXML(raw string) string { // take the form `` and transform it to ``, taking // care to properly escape the string that becomes the attribute value - return qwenTagRegex.ReplaceAllStringFunc(raw, func(match string) string { + transformed := qwenTagRegex.ReplaceAllStringFunc(raw, func(match string) string { groups := qwenTagRegex.FindStringSubmatch(match) tag := groups[1] var escapedValue strings.Builder xml.EscapeText(&escapedValue, []byte(groups[2])) return fmt.Sprintf(`<%s name="%s">`, tag, escapedValue.String()) }) + + // Walk the resulting string, escaping any character data that sits between the + // xml tags we just emitted + var out strings.Builder + lastIdx := 0 + for _, loc := range qwenXMLTagRegex.FindAllStringIndex(transformed, -1) { + if loc[0] > lastIdx { + escapeTextNode(&out, transformed[lastIdx:loc[0]]) + } + out.WriteString(transformed[loc[0]:loc[1]]) + lastIdx = loc[1] + } + if lastIdx < len(transformed) { + escapeTextNode(&out, transformed[lastIdx:]) + } + + return out.String() +} + +// escapeTextNode escapes XML character data without altering other characters +// like newlines or tabs (which is why we don't use xml.EscapeText for this) +func escapeTextNode(sb *strings.Builder, s string) { + for _, r := range s { + switch r { + case '&': + sb.WriteString("&") + case '<': + sb.WriteString("<") + case '>': + sb.WriteString(">") + default: + sb.WriteRune(r) + } + } } diff --git a/model/parsers/qwen3coder_test.go b/model/parsers/qwen3coder_test.go index 2389c77b58..43823e6fc6 100644 --- a/model/parsers/qwen3coder_test.go +++ b/model/parsers/qwen3coder_test.go @@ -312,6 +312,41 @@ true }, }, }, + // regression test for + { + name: "ampersands in parameter values", + tools: []api.Tool{}, + rawToolCall: ` + +ls && echo "done" + +`, + wantToolCall: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "exec", + Arguments: map[string]any{ + "command": "ls && echo \"done\"", + }, + }, + }, + }, + { + name: "angle brackets in parameter values", + tools: []api.Tool{}, + rawToolCall: ` + +ls && echo "a > b and a < b" + +`, + wantToolCall: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "exec", + Arguments: map[string]any{ + "command": "ls && echo \"a > b and a < b\"", + }, + }, + }, + }, } for i, step := range steps { @@ -798,6 +833,19 @@ celsius `, }, + { + desc: "ampersands in parameter values", + raw: ` + + San Francisco & San Jose + + `, + want: ` + + San Francisco & San Jose + + `, + }, } for _, tc := range cases {