mirror of
https://github.com/ollama/ollama.git
synced 2025-11-10 20:17:59 +01:00
parsers: fix &s in qwen3coder parameter values
In <https://github.com/ollama/ollama/issues/12357> we that the model will output tool calls such as ``` <function=shell> <parameter=command> pwd && ls -la </parameter> </function> ``` We parse this using the approach of transforming into valid xml and then using an xml parser. While we do transform the function and parameter names, we weren't escaping the parameter values (which in this example are invalid since `pwd && ls -la` contains unescaped ampersands). This has been fixed by first transforming the tags in the same way, and then walking the transformed string and escaping the text in between the tags. This also fixes a case where `<` in the middle of a parameter value would cause an xml parse failure. Fixes: #12357
This commit is contained in:
@@ -393,18 +393,55 @@ func parseValue(raw string, paramType api.PropertyType) any {
|
|||||||
return raw
|
return raw
|
||||||
}
|
}
|
||||||
|
|
||||||
var qwenTagRegex = regexp.MustCompile(`<(\w+)=([^>]+)>`)
|
var (
|
||||||
|
qwenTagRegex = regexp.MustCompile(`<(\w+)=([^>]+)>`)
|
||||||
|
qwenXMLTagRegex = regexp.MustCompile(`</?(?:function|parameter)(?:\s+name="[^"]*")?>`)
|
||||||
|
)
|
||||||
|
|
||||||
// transformToXML transforms a raw qwen tool call with xml-like tags into valid
|
// transformToXML transforms a raw qwen tool call with xml-like tags into valid
|
||||||
// xml so that it can be parsed by any xml parser
|
// xml so that it can be parsed by any xml parser
|
||||||
func transformToXML(raw string) string {
|
func transformToXML(raw string) string {
|
||||||
// take the form `<tag=abc>` and transform it to `<tag name="abc">`, taking
|
// take the form `<tag=abc>` and transform it to `<tag name="abc">`, taking
|
||||||
// care to properly escape the string that becomes the attribute value
|
// care to properly escape the string that becomes the attribute value
|
||||||
return qwenTagRegex.ReplaceAllStringFunc(raw, func(match string) string {
|
transformed := qwenTagRegex.ReplaceAllStringFunc(raw, func(match string) string {
|
||||||
groups := qwenTagRegex.FindStringSubmatch(match)
|
groups := qwenTagRegex.FindStringSubmatch(match)
|
||||||
tag := groups[1]
|
tag := groups[1]
|
||||||
var escapedValue strings.Builder
|
var escapedValue strings.Builder
|
||||||
xml.EscapeText(&escapedValue, []byte(groups[2]))
|
xml.EscapeText(&escapedValue, []byte(groups[2]))
|
||||||
return fmt.Sprintf(`<%s name="%s">`, tag, escapedValue.String())
|
return fmt.Sprintf(`<%s name="%s">`, tag, escapedValue.String())
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Walk the resulting string, escaping any character data that sits between the
|
||||||
|
// xml tags we just emitted
|
||||||
|
var out strings.Builder
|
||||||
|
lastIdx := 0
|
||||||
|
for _, loc := range qwenXMLTagRegex.FindAllStringIndex(transformed, -1) {
|
||||||
|
if loc[0] > lastIdx {
|
||||||
|
escapeTextNode(&out, transformed[lastIdx:loc[0]])
|
||||||
|
}
|
||||||
|
out.WriteString(transformed[loc[0]:loc[1]])
|
||||||
|
lastIdx = loc[1]
|
||||||
|
}
|
||||||
|
if lastIdx < len(transformed) {
|
||||||
|
escapeTextNode(&out, transformed[lastIdx:])
|
||||||
|
}
|
||||||
|
|
||||||
|
return out.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// escapeTextNode escapes XML character data without altering other characters
|
||||||
|
// like newlines or tabs (which is why we don't use xml.EscapeText for this)
|
||||||
|
func escapeTextNode(sb *strings.Builder, s string) {
|
||||||
|
for _, r := range s {
|
||||||
|
switch r {
|
||||||
|
case '&':
|
||||||
|
sb.WriteString("&")
|
||||||
|
case '<':
|
||||||
|
sb.WriteString("<")
|
||||||
|
case '>':
|
||||||
|
sb.WriteString(">")
|
||||||
|
default:
|
||||||
|
sb.WriteRune(r)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -312,6 +312,41 @@ true
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
// regression test for <https://github.com/ollama/ollama/issues/12357>
|
||||||
|
{
|
||||||
|
name: "ampersands in parameter values",
|
||||||
|
tools: []api.Tool{},
|
||||||
|
rawToolCall: `<function=exec>
|
||||||
|
<parameter=command>
|
||||||
|
ls && echo "done"
|
||||||
|
</parameter>
|
||||||
|
</function>`,
|
||||||
|
wantToolCall: api.ToolCall{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "exec",
|
||||||
|
Arguments: map[string]any{
|
||||||
|
"command": "ls && echo \"done\"",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "angle brackets in parameter values",
|
||||||
|
tools: []api.Tool{},
|
||||||
|
rawToolCall: `<function=exec>
|
||||||
|
<parameter=command>
|
||||||
|
ls && echo "a > b and a < b"
|
||||||
|
</parameter>
|
||||||
|
</function>`,
|
||||||
|
wantToolCall: api.ToolCall{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "exec",
|
||||||
|
Arguments: map[string]any{
|
||||||
|
"command": "ls && echo \"a > b and a < b\"",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, step := range steps {
|
for i, step := range steps {
|
||||||
@@ -798,6 +833,19 @@ celsius
|
|||||||
</parameter>
|
</parameter>
|
||||||
</function>`,
|
</function>`,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
desc: "ampersands in parameter values",
|
||||||
|
raw: `<function=get_current_temperature>
|
||||||
|
<parameter=location>
|
||||||
|
San Francisco & San Jose
|
||||||
|
</parameter>
|
||||||
|
</function>`,
|
||||||
|
want: `<function name="get_current_temperature">
|
||||||
|
<parameter name="location">
|
||||||
|
San Francisco & San Jose
|
||||||
|
</parameter>
|
||||||
|
</function>`,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range cases {
|
for _, tc := range cases {
|
||||||
|
|||||||
Reference in New Issue
Block a user