From bdd9d22dfd9798cad0b17e812e251f9af4c30f12 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Sun, 20 Jul 2025 14:55:14 -0700 Subject: [PATCH] tools: fix parsing issue when a tool name is a substring of another (#11456) Co-authored-by: frob --- tools/tools.go | 85 ++++++++++++---- tools/tools_test.go | 242 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 310 insertions(+), 17 deletions(-) diff --git a/tools/tools.go b/tools/tools.go index f883bf2842..c149885f6d 100644 --- a/tools/tools.go +++ b/tools/tools.go @@ -115,21 +115,7 @@ func (p *Parser) findTag() (int, bool) { // parseToolCall finds the next complete tool call in the buffer // incrementing n and advancing the buffer. func (p *Parser) parseToolCall() *api.ToolCall { - var tool *api.Tool - var end int = len(p.buffer) - var i int - - // find tool name - for _, t := range p.tools { - n := t.Function.Name - if i = bytes.Index(p.buffer, []byte(n)); i != -1 { - if i+len(n) < end { - tool = &t - end = i + len(n) - } - } - } - + tool, end := findTool(p.tools, p.buffer) if tool == nil { return nil } @@ -139,10 +125,10 @@ func (p *Parser) parseToolCall() *api.ToolCall { // parsing arguments before the tool name, which may be needed in the future args := map[string]any{} if len(tool.Function.Parameters.Properties) > 0 { + var i int if args, i = findArguments(*tool, p.buffer[end:]); args == nil { return nil } - end += i } @@ -159,9 +145,74 @@ func (p *Parser) parseToolCall() *api.ToolCall { return tc } +// findTool finds the first tool name in the list that matches the +// beginning of the buffer, returning nil if no tool is found +// or if the buffer ends with a partial tool name since we need +// to wait for more data to disambiguate. +// The second return value is the end position of the tool name +// if one is found, otherwise 0. +func findTool(tools []api.Tool, buf []byte) (*api.Tool, int) { + if len(buf) == 0 { + return nil, 0 + } + + // check if buffer ends with a partial tool name + // this prevents matching "get" when seeing "get_weather" + var longest string + for _, t := range tools { + if len(t.Function.Name) > len(longest) { + longest = t.Function.Name + } + } + + // Only check up to longest characters from the end + for i := 1; i <= min(len(buf), len(longest)); i++ { + tail := buf[len(buf)-i:] + for _, t := range tools { + name := []byte(t.Function.Name) + if len(tail) < len(name) && bytes.HasPrefix(name, tail) { + return nil, 0 + } + } + } + + // find first occurrence of the longest tool name + var found *api.Tool + start := -1 + end := -1 + + for i := range tools { + name := []byte(tools[i].Function.Name) + pos := bytes.Index(buf, name) + if pos == -1 { + continue + } + + // Skip if we have a better match already + if start != -1 { + if pos > start { + continue + } + if pos == start && len(name) <= len(found.Function.Name) { + continue + } + } + + found = &tools[i] + start = pos + end = pos + len(name) + } + + if found != nil { + return found, end + } + + return nil, 0 +} + // findArguments returns the first object that appears to be // arguments for the provided tool in the provided buffer, -// returning nil if no arguments are found. +// returning nil if no arguments are found and the end position // TODO (jmorganca): this does not support parsing omitted arguments // objects for functions that have all-optional parameters // e.g. `{"name": "get_conditions", "arguments": {}}` will work but diff --git a/tools/tools_test.go b/tools/tools_test.go index 8418ab6c31..092ae32332 100644 --- a/tools/tools_test.go +++ b/tools/tools_test.go @@ -112,6 +112,81 @@ func TestParser(t *testing.T) { Description: "Say hello", }, }, + { + Type: "function", + Function: api.ToolFunction{ + Name: "say_hello_world", + Description: "Say hello world", + }, + }, + { + Type: "function", + Function: api.ToolFunction{ + Name: "get_address", + Description: "Get the address of a given location", + Parameters: struct { + Type string `json:"type"` + Defs any `json:"$defs,omitempty"` + Items any `json:"items,omitempty"` + Required []string `json:"required"` + Properties map[string]struct { + Type api.PropertyType `json:"type"` + Items any `json:"items,omitempty"` + Description string `json:"description"` + Enum []any `json:"enum,omitempty"` + } `json:"properties"` + }{ + Type: "object", + Properties: map[string]struct { + Type api.PropertyType `json:"type"` + Items any `json:"items,omitempty"` + Description string `json:"description"` + Enum []any `json:"enum,omitempty"` + }{ + "location": { + Type: api.PropertyType{"string"}, + Description: "The location to get the address for", + }, + }, + }, + }, + }, + { + Type: "function", + Function: api.ToolFunction{ + Name: "add", + Description: "Add two numbers", + Parameters: struct { + Type string `json:"type"` + Defs any `json:"$defs,omitempty"` + Items any `json:"items,omitempty"` + Required []string `json:"required"` + Properties map[string]struct { + Type api.PropertyType `json:"type"` + Items any `json:"items,omitempty"` + Description string `json:"description"` + Enum []any `json:"enum,omitempty"` + } `json:"properties"` + }{ + Type: "object", + Properties: map[string]struct { + Type api.PropertyType `json:"type"` + Items any `json:"items,omitempty"` + Description string `json:"description"` + Enum []any `json:"enum,omitempty"` + }{ + "a": { + Type: api.PropertyType{"string"}, + Description: "The first number to add", + }, + "b": { + Type: api.PropertyType{"string"}, + Description: "The second number to add", + }, + }, + }, + }, + }, } tests := []struct { @@ -629,6 +704,173 @@ func TestParser(t *testing.T) { }, }, }, + { + name: "tool name with collision", + inputs: []string{ + "", + "{", + "\"name\": \"say_hello", + "_world\",", + "}", + "}", + }, + content: "", + tmpl: qwen, + calls: []api.ToolCall{ + { + Function: api.ToolCallFunction{ + Index: 0, + Name: "say_hello_world", + Arguments: api.ToolCallFunctionArguments{}, + }, + }, + }, + }, + { + name: "tool name with collision multiple", + inputs: []string{ + "", + "{", + "\"name\": \"say_hello", + "_world\",", + "}", + "", + "", + "{", + "\"name\": \"say_hello", + "\",", + "}", + "", + }, + content: "", + tmpl: qwen, + calls: []api.ToolCall{ + { + Function: api.ToolCallFunction{ + Index: 0, + Name: "say_hello_world", + Arguments: api.ToolCallFunctionArguments{}, + }, + }, + { + Function: api.ToolCallFunction{ + Index: 1, + Name: "say_hello", + Arguments: api.ToolCallFunctionArguments{}, + }, + }, + }, + }, + { + name: "tool name with collision non streaming", + inputs: []string{ + `{"name": "say_hello`, + }, + content: "", + tmpl: qwen, + calls: nil, + }, + { + name: "tool name with collision non streaming multiple", + inputs: []string{ + `{"name": "say_hello"}{"name": "say_hello_world"}`, + }, + content: "", + tmpl: qwen, + calls: []api.ToolCall{ + { + Function: api.ToolCallFunction{ + Index: 0, + Name: "say_hello", + Arguments: api.ToolCallFunctionArguments{}, + }, + }, + { + Function: api.ToolCallFunction{ + Index: 1, + Name: "say_hello_world", + Arguments: api.ToolCallFunctionArguments{}, + }, + }, + }, + }, + { + name: "tool name with collision non streaming shorter", + inputs: []string{ + `{"name": "say_hello"}`, + }, + content: "", + tmpl: qwen, + calls: []api.ToolCall{ + { + Function: api.ToolCallFunction{ + Index: 0, + Name: "say_hello", + Arguments: api.ToolCallFunctionArguments{}, + }, + }, + }, + }, + { + name: "tool name with collision non streaming longer", + inputs: []string{ + `{"name": "say_hello_world"}`, + }, + content: "", + tmpl: qwen, + calls: []api.ToolCall{ + { + Function: api.ToolCallFunction{ + Index: 0, + Name: "say_hello_world", + Arguments: api.ToolCallFunctionArguments{}, + }, + }, + }, + }, + { + name: "tool name with substring of another", + inputs: []string{ + "{", + "\"name\": \"get_address\",", + "\"arguments\": {", + "\"location\": \"London\"", + "}", + "}", + }, + content: "", + tmpl: json, + calls: []api.ToolCall{ + { + Function: api.ToolCallFunction{ + Index: 0, + Name: "get_address", + Arguments: api.ToolCallFunctionArguments{ + "location": "London", + }, + }, + }, + }, + }, + { + name: "tool name with substring of another", + inputs: []string{ + `{"name": "get_address", "arguments": {"location": "London"}}`, + }, + content: "", + tmpl: qwen, + calls: []api.ToolCall{ + { + Function: api.ToolCallFunction{ + Index: 0, + Name: "get_address", + Arguments: api.ToolCallFunctionArguments{ + "location": "London", + }, + }, + }, + }, + }, } for _, tt := range tests {