diff --git a/integration/qwen3vl_test.go b/integration/qwen3vl_test.go index 9d8e8b898f..2b15c827af 100644 --- a/integration/qwen3vl_test.go +++ b/integration/qwen3vl_test.go @@ -12,17 +12,24 @@ import ( "github.com/ollama/ollama/api" ) -// var MODEL = "qwen3vl-odc-dev" -// var MODEL = "qwen3vl-thinking-odc-dev" +// TestQwen3VLStreaming tests Qwen3-VL with streaming enabled +func TestQwen3VLStreaming(t *testing.T) { + runQwen3VLTests(t, true) +} + +// TestQwen3VLNonStreaming tests Qwen3-VL with streaming disabled +func TestQwen3VLNonStreaming(t *testing.T) { + runQwen3VLTests(t, false) +} + +func runQwen3VLTests(t *testing.T, stream bool) { + models := []string{"qwen3vl-odc-dev"} // , "qwen3vl-thinking-odc-dev", "qwen3-vl:8b"} -// TestQwen3VLScenarios exercises common Qwen3-VL cases using integration helpers -func TestQwen3VLScenarios(t *testing.T) { tests := []struct { name string messages []api.Message tools []api.Tool images []string - anyResp []string }{ { name: "Text-Only Scenario", @@ -30,7 +37,6 @@ func TestQwen3VLScenarios(t *testing.T) { {Role: "system", Content: "You are a helpful assistant."}, {Role: "user", Content: "Write a short haiku about autumn."}, }, - // anyResp: []string{"haiku", "autumn", "fall"}, }, { name: "Single Image Scenario", @@ -45,7 +51,6 @@ func TestQwen3VLScenarios(t *testing.T) { }, }, images: []string{"testdata/question.png"}, - // anyResp: []string{"answer", "solution", "explanation"}, }, { name: "Multiple Images Scenario", @@ -56,11 +61,10 @@ func TestQwen3VLScenarios(t *testing.T) { }, { Role: "user", - Content: "What is the answer to these two questions?", + Content: "Use both images to answer the question.", }, }, - images: []string{"testdata/satmath1.png", "testdata/satmath2.png"}, - // anyResp: []string{"image", "answer", "analysis"}, + images: []string{"testdata/question.png", "testdata/menu.png"}, }, { name: "Tools Scenario", @@ -90,13 +94,71 @@ func TestQwen3VLScenarios(t *testing.T) { }, }, }, - // anyResp: []string{"san francisco", "weather", "temperature"}, + }, + { + name: "Multi-Turn Tools With Image", + messages: []api.Message{ + {Role: "system", Content: "Use tools when actions are required."}, + {Role: "user", Content: "What's the current temperature in San Francisco?"}, + {Role: "assistant", Content: "", ToolCalls: []api.ToolCall{ + {Function: api.ToolCallFunction{ + Name: "get_weather", + Arguments: api.ToolCallFunctionArguments{ + "city": "San Francisco", + }, + }}, + }}, + {Role: "tool", ToolName: "get_weather", Content: "Sunny"}, + {Role: "user", Content: "Given that weather, what are the top 10 activities to do in San Francisco? Consider this photo as context."}, + }, + tools: []api.Tool{ + { + Type: "function", + Function: api.ToolFunction{ + Name: "get_weather", + Description: "Get current weather for a city.", + Parameters: api.ToolFunctionParameters{ + Type: "object", + Properties: map[string]api.ToolProperty{ + "city": { + Type: api.PropertyType{"string"}, + Description: "The city to get the weather for", + }, + }, + Required: []string{"city"}, + }, + }, + }, + { + Type: "function", + Function: api.ToolFunction{ + Name: "get_top_10_activities", + Description: "Get the top 10 activities for a city given the weather.", + Parameters: api.ToolFunctionParameters{ + Type: "object", + Properties: map[string]api.ToolProperty{ + "weather": { + Type: api.PropertyType{"string"}, + Description: "The weather in the city", + }, + "city": { + Type: api.PropertyType{"string"}, + Description: "The city to get the activities for", + }, + "image": { + Type: api.PropertyType{"base64"}, + Description: "The image of the city", + }, + }, + Required: []string{"weather", "city", "image"}, + }, + }, + }, + }, + images: []string{"testdata/sf-city.jpeg"}, }, } - // models := []string{"qwen3-vl:8b", "qwen3-vl:30b"} - models := []string{"qwen3vl-odc-dev"} // , "qwen3vl-thinking-odc-dev"} - for _, model := range models { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -126,6 +188,8 @@ func TestQwen3VLScenarios(t *testing.T) { }, } + isRemote := os.Getenv("OLLAMA_TEST_EXISTING") != "" + // Use integration helpers ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() @@ -133,7 +197,7 @@ func TestQwen3VLScenarios(t *testing.T) { defer cleanup() // Skip pulling/preloading when using an existing (cloud) server - if os.Getenv("OLLAMA_TEST_EXISTING") == "" { + if !isRemote { if err := PullIfMissing(ctx, client, req.Model); err != nil { t.Fatal(err) } @@ -141,29 +205,53 @@ func TestQwen3VLScenarios(t *testing.T) { _ = client.Generate(ctx, &api.GenerateRequest{Model: req.Model}, func(r api.GenerateResponse) error { return nil }) } - // If this is a tools scenario, validate tool_calls instead of content - if len(tt.tools) > 0 { - var gotCalls []api.ToolCall - err := client.Chat(ctx, &req, func(r api.ChatResponse) error { - if len(r.Message.ToolCalls) > 0 { - gotCalls = append(gotCalls, r.Message.ToolCalls...) - } - return nil - }) - if err != nil { - t.Fatalf("chat error: %v", err) + var contentBuf, thinkingBuf strings.Builder + var gotCalls []api.ToolCall + + err := client.Chat(ctx, &req, func(r api.ChatResponse) error { + contentBuf.WriteString(r.Message.Content) + thinkingBuf.WriteString(r.Message.Thinking) + if len(r.Message.ToolCalls) > 0 { + gotCalls = append(gotCalls, r.Message.ToolCalls...) } + return nil + }) + if err != nil { + t.Fatalf("chat error: %v", err) + } + + // Log responses (truncated) + content := contentBuf.String() + thinking := thinkingBuf.String() + const maxLog = 800 + if len(thinking) > 0 { + if len(thinking) > maxLog { + thinking = thinking[:maxLog] + "... [truncated]" + } + t.Logf("Thinking: %s", thinking) + } + if len(content) > 0 { + if len(content) > maxLog { + content = content[:maxLog] + "... [truncated]" + } + t.Logf("Content: %s", content) + } + if len(gotCalls) > 0 { + t.Logf("Tool calls: %d", len(gotCalls)) + for i, call := range gotCalls { + t.Logf(" [%d] %s(%+v)", i, call.Function.Name, call.Function.Arguments) + } + } + + // If this is a tools scenario, validate tool_calls + if len(tt.tools) > 0 { if len(gotCalls) == 0 { t.Fatalf("expected at least one tool call, got none") } if gotCalls[0].Function.Name == "" { t.Fatalf("tool call missing function name: %#v", gotCalls[0]) } - return } - - // Otherwise, validate content contains any of the expected substrings - DoChat(ctx, t, client, req, toLowerSlice(tt.anyResp), 240*time.Second, 30*time.Second) }) } } @@ -177,11 +265,3 @@ func loadImageData(t *testing.T, imagePath string) []byte { } return data } - -func toLowerSlice(in []string) []string { - out := make([]string, len(in)) - for i, s := range in { - out[i] = strings.ToLower(s) - } - return out -}