From 76eb7d0fff04563ee89e253fc71a4cbf5d0f05f7 Mon Sep 17 00:00:00 2001 From: Patrick Devine Date: Thu, 30 Oct 2025 13:19:21 -0700 Subject: [PATCH] testing: test more models with tool calling (#12867) --- integration/api_test.go | 96 --------------------------- integration/tools_test.go | 132 ++++++++++++++++++++++++++++++++++++++ integration/utils_test.go | 16 +++++ 3 files changed, 148 insertions(+), 96 deletions(-) create mode 100644 integration/tools_test.go diff --git a/integration/api_test.go b/integration/api_test.go index 48572085d8..5d7acd9495 100644 --- a/integration/api_test.go +++ b/integration/api_test.go @@ -408,99 +408,3 @@ func TestAPIEmbeddings(t *testing.T) { t.Errorf("zero length embedding response") } } - -func TestAPIToolCalling(t *testing.T) { - initialTimeout := 60 * time.Second - streamTimeout := 30 * time.Second - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - defer cancel() - - client, _, cleanup := InitServerConnection(ctx, t) - defer cleanup() - - modelName := "qwen3:0.6b" - if err := PullIfMissing(ctx, client, modelName); err != nil { - t.Fatalf("pull failed %s", err) - } - - tools := []api.Tool{ - { - Type: "function", - Function: api.ToolFunction{ - Name: "get_weather", - Description: "Get the current weather in a given location", - Parameters: api.ToolFunctionParameters{ - Type: "object", - Required: []string{"location"}, - Properties: map[string]api.ToolProperty{ - "location": { - Type: api.PropertyType{"string"}, - Description: "The city and state, e.g. San Francisco, CA", - }, - }, - }, - }, - }, - } - - req := api.ChatRequest{ - Model: modelName, - Messages: []api.Message{ - { - Role: "user", - Content: "Call get_weather with location set to San Francisco.", - }, - }, - Tools: tools, - Options: map[string]any{ - "temperature": 0, - }, - } - - stallTimer := time.NewTimer(initialTimeout) - var gotToolCall bool - var lastToolCall api.ToolCall - - fn := func(response api.ChatResponse) error { - if len(response.Message.ToolCalls) > 0 { - gotToolCall = true - lastToolCall = response.Message.ToolCalls[len(response.Message.ToolCalls)-1] - } - if !stallTimer.Reset(streamTimeout) { - return fmt.Errorf("stall was detected while streaming response, aborting") - } - return nil - } - - stream := true - req.Stream = &stream - done := make(chan int) - var genErr error - go func() { - genErr = client.Chat(ctx, &req, fn) - done <- 0 - }() - - select { - case <-stallTimer.C: - t.Errorf("tool-calling chat never started. Timed out after: %s", initialTimeout.String()) - case <-done: - if genErr != nil { - t.Fatalf("chat failed: %v", genErr) - } - - if !gotToolCall { - t.Fatalf("expected at least one tool call, got none") - } - - if lastToolCall.Function.Name != "get_weather" { - t.Errorf("unexpected tool called: got %q want %q", lastToolCall.Function.Name, "get_weather") - } - - if _, ok := lastToolCall.Function.Arguments["location"]; !ok { - t.Errorf("expected tool arguments to include 'location', got: %s", lastToolCall.Function.Arguments.String()) - } - case <-ctx.Done(): - t.Error("outer test context done while waiting for tool-calling chat") - } -} diff --git a/integration/tools_test.go b/integration/tools_test.go new file mode 100644 index 0000000000..d6b8dfa54d --- /dev/null +++ b/integration/tools_test.go @@ -0,0 +1,132 @@ +//go:build integration + +package integration + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/ollama/ollama/api" +) + +func TestAPIToolCalling(t *testing.T) { + initialTimeout := 60 * time.Second + streamTimeout := 60 * time.Second + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + + client, _, cleanup := InitServerConnection(ctx, t) + defer cleanup() + + minVRAM := map[string]uint64{ + "qwen3-vl": 16, + "gpt-oss:20b": 16, + "gpt-oss:120b": 70, + "qwen3": 6, + "llama3.1": 8, + "llama3.2": 4, + "mistral": 6, + "qwen2.5": 6, + "qwen2": 6, + "mistral-nemo": 9, + "mistral-small": 16, + "mixtral:8x22b": 80, + "qwq": 20, + "granite3.3": 7, + } + + for _, model := range libraryToolsModels { + t.Run(model, func(t *testing.T) { + if v, ok := minVRAM[model]; ok { + skipUnderMinVRAM(t, v) + } + + if err := PullIfMissing(ctx, client, model); err != nil { + t.Fatalf("pull failed %s", err) + } + + tools := []api.Tool{ + { + Type: "function", + Function: api.ToolFunction{ + Name: "get_weather", + Description: "Get the current weather in a given location", + Parameters: api.ToolFunctionParameters{ + Type: "object", + Required: []string{"location"}, + Properties: map[string]api.ToolProperty{ + "location": { + Type: api.PropertyType{"string"}, + Description: "The city and state, e.g. San Francisco, CA", + }, + }, + }, + }, + }, + } + + req := api.ChatRequest{ + Model: model, + Messages: []api.Message{ + { + Role: "user", + Content: "Call get_weather with location set to San Francisco.", + }, + }, + Tools: tools, + Options: map[string]any{ + "temperature": 0, + }, + } + + stallTimer := time.NewTimer(initialTimeout) + var gotToolCall bool + var lastToolCall api.ToolCall + + fn := func(response api.ChatResponse) error { + if len(response.Message.ToolCalls) > 0 { + gotToolCall = true + lastToolCall = response.Message.ToolCalls[len(response.Message.ToolCalls)-1] + } + if !stallTimer.Reset(streamTimeout) { + return fmt.Errorf("stall was detected while streaming response, aborting") + } + return nil + } + + stream := true + req.Stream = &stream + done := make(chan int) + var genErr error + go func() { + genErr = client.Chat(ctx, &req, fn) + done <- 0 + }() + + select { + case <-stallTimer.C: + t.Errorf("tool-calling chat never started. Timed out after: %s", initialTimeout.String()) + case <-done: + if genErr != nil { + t.Fatalf("chat failed: %v", genErr) + } + + if !gotToolCall { + t.Fatalf("expected at least one tool call, got none") + } + + if lastToolCall.Function.Name != "get_weather" { + t.Errorf("unexpected tool called: got %q want %q", lastToolCall.Function.Name, "get_weather") + } + + if _, ok := lastToolCall.Function.Arguments["location"]; !ok { + t.Errorf("expected tool arguments to include 'location', got: %s", lastToolCall.Function.Arguments.String()) + } + case <-ctx.Done(): + t.Error("outer test context done while waiting for tool-calling chat") + } + }) + } +} diff --git a/integration/utils_test.go b/integration/utils_test.go index c0bac5e14f..8a362408e5 100644 --- a/integration/utils_test.go +++ b/integration/utils_test.go @@ -260,6 +260,22 @@ var ( "snowflake-arctic-embed", "snowflake-arctic-embed2", } + libraryToolsModels = []string{ + "qwen3-vl", + "gpt-oss:20b", + "gpt-oss:120b", + "qwen3", + "llama3.1", + "llama3.2", + "mistral", + "qwen2.5", + "qwen2", + "mistral-nemo", + "mistral-small", + "mixtral:8x22b", + "qwq", + "granite3.3", + } blueSkyPrompt = "why is the sky blue? Be brief but factual in your reply" blueSkyExpected = []string{"rayleigh", "scatter", "atmosphere", "nitrogen", "oxygen", "wavelength", "interact"}