diff --git a/integration/api_test.go b/integration/api_test.go
index 39eea39c06..5d7acd9495 100644
--- a/integration/api_test.go
+++ b/integration/api_test.go
@@ -381,3 +381,30 @@ func TestAPIShowModel(t *testing.T) {
 		t.Errorf("%s missing modified_at: %#v", modelName, resp)
 	}
 }
+
+func TestAPIEmbeddings(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
+	defer cancel()
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+	req := api.EmbeddingRequest{
+		Model:  libraryEmbedModels[0],
+		Prompt: "why is the sky blue?",
+		Options: map[string]interface{}{
+			"temperature": 0,
+			"seed":        123,
+		},
+	}
+
+	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+		t.Fatalf("pull failed %s", err)
+	}
+
+	resp, err := client.Embeddings(ctx, &req)
+	if err != nil {
+		t.Fatalf("embeddings call failed %s", err)
+	}
+	if len(resp.Embedding) == 0 {
+		t.Errorf("zero length embedding response")
+	}
+}
diff --git a/integration/qwen3vl_test.go b/integration/qwen3vl_test.go
index 1333eabf76..f9dc25dcee 100644
--- a/integration/qwen3vl_test.go
+++ b/integration/qwen3vl_test.go
@@ -12,18 +12,22 @@ import (
 	"github.com/ollama/ollama/api"
 )
 
-// TestQwen3VLStreaming tests Qwen3-VL with streaming enabled
-func TestQwen3VLStreaming(t *testing.T) {
-	runQwen3VLTests(t, true)
+// getTestConfig returns model and streaming mode based on environment variables or defaults
+func getTestConfig() (model string, stream bool) {
+	model = os.Getenv("QWEN3VL_MODEL")
+	if model == "" {
+		model = "qwen3-vl:235b-cloud" // default
+	}
+
+	streamStr := os.Getenv("QWEN3VL_STREAM")
+	stream = streamStr != "false" // default to true
+
+	return model, stream
 }
 
-// TestQwen3VLNonStreaming tests Qwen3-VL with streaming disabled
-func TestQwen3VLNonStreaming(t *testing.T) {
-	runQwen3VLTests(t, false)
-}
+func TestQwen3VL(t *testing.T) {
+	model, stream := getTestConfig()
 
-func runQwen3VLTests(t *testing.T, stream bool) {
-	models := []string{"qwen3-vl:235b-cloud", "qwen3-vl:235b-instruct-cloud"}
 	tests := []struct {
 		name     string
 		messages []api.Message
@@ -46,10 +50,10 @@ func runQwen3VLTests(t *testing.T, stream bool) {
 				},
 				{
 					Role:    "user",
-					Content: "What is the answer to this question?",
+					Content: "What is in this image?",
 				},
 			},
-			images: []string{"testdata/question.png"},
+			images: []string{"testdata/menu.png"},
 		},
 		{
 			name: "Multiple Images Scenario",
@@ -63,7 +67,7 @@ func runQwen3VLTests(t *testing.T, stream bool) {
 					Content: "Use both images to answer the question.",
 				},
 			},
-			images: []string{"testdata/question.png", "testdata/menu.png"},
+			images: []string{"testdata/satmath1.png", "testdata/satmath2.png"},
 		},
 		{
 			name: "Tools Scenario",
@@ -158,101 +162,89 @@ func runQwen3VLTests(t *testing.T, stream bool) {
 		},
 	}
 
-	for _, model := range models {
-		for _, tt := range tests {
-			t.Run(tt.name, func(t *testing.T) {
-				// Load and attach images if specified
-				if len(tt.images) > 0 {
-					var imgs []api.ImageData
-					for _, path := range tt.images {
-						imgs = append(imgs, loadImageData(t, path))
-					}
-					if len(tt.messages) > 0 {
-						lastMessage := &tt.messages[len(tt.messages)-1]
-						if lastMessage.Role == "user" {
-							lastMessage.Images = imgs
-						}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Load and attach images to last user message
+			messages := tt.messages
+			if len(tt.images) > 0 {
+				var imgs []api.ImageData
+				for _, path := range tt.images {
+					imgs = append(imgs, loadImageData(t, path))
+				}
+				// Find last user message and attach images
+				for i := len(messages) - 1; i >= 0; i-- {
+					if messages[i].Role == "user" {
+						messages[i].Images = imgs
+						break
 					}
 				}
+			}
 
-				// Build chat request
-				req := api.ChatRequest{
-					Model:    model,
-					Messages: tt.messages,
-					Tools:    tt.tools,
-					Stream:   &stream,
-					Options: map[string]any{
-						"seed":        42,
-						"temperature": 0.0,
-					},
+			ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
+			defer cancel()
+			client, _, cleanup := InitServerConnection(ctx, t)
+			defer cleanup()
+
+			// Pull/preload model if not using remote server
+			if os.Getenv("OLLAMA_TEST_EXISTING") == "" {
+				if err := PullIfMissing(ctx, client, model); err != nil {
+					t.Fatal(err)
 				}
+				// Preload to reduce startup latency
+				_ = client.Generate(ctx, &api.GenerateRequest{Model: model}, func(api.GenerateResponse) error { return nil })
+			}
 
-				isRemote := os.Getenv("OLLAMA_TEST_EXISTING") != ""
+			// Build and execute chat request
+			req := &api.ChatRequest{
+				Model:    model,
+				Messages: messages,
+				Tools:    tt.tools,
+				Stream:   &stream,
+				Options:  map[string]any{"seed": 42, "temperature": 0.0},
+			}
 
-				// Use integration helpers
-				ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
-				defer cancel()
-				client, _, cleanup := InitServerConnection(ctx, t)
-				defer cleanup()
+			var contentBuf, thinkingBuf strings.Builder
+			var toolCalls []api.ToolCall
 
-				// Skip pulling/preloading when using an existing (cloud) server
-				if !isRemote {
-					if err := PullIfMissing(ctx, client, req.Model); err != nil {
-						t.Fatal(err)
-					}
-					// Preload model once to reduce startup latency
-					_ = client.Generate(ctx, &api.GenerateRequest{Model: req.Model}, func(r api.GenerateResponse) error { return nil })
-				}
-
-				var contentBuf, thinkingBuf strings.Builder
-				var gotCalls []api.ToolCall
-
-				err := client.Chat(ctx, &req, func(r api.ChatResponse) error {
-					contentBuf.WriteString(r.Message.Content)
-					thinkingBuf.WriteString(r.Message.Thinking)
-					if len(r.Message.ToolCalls) > 0 {
-						gotCalls = append(gotCalls, r.Message.ToolCalls...)
-					}
-					return nil
-				})
-				if err != nil {
-					t.Fatalf("chat error: %v", err)
-				}
-
-				// Log responses (truncated)
-				content := contentBuf.String()
-				thinking := thinkingBuf.String()
-				const maxLog = 800
-				if len(thinking) > 0 {
-					if len(thinking) > maxLog {
-						thinking = thinking[:maxLog] + "... [truncated]"
-					}
-					t.Logf("Thinking: %s", thinking)
-				}
-				if len(content) > 0 {
-					if len(content) > maxLog {
-						content = content[:maxLog] + "... [truncated]"
-					}
-					t.Logf("Content: %s", content)
-				}
-				if len(gotCalls) > 0 {
-					t.Logf("Tool calls: %d", len(gotCalls))
-					for i, call := range gotCalls {
-						t.Logf("  [%d] %s(%+v)", i, call.Function.Name, call.Function.Arguments)
-					}
-				}
-
-				// If this is a tools scenario, validate tool_calls
-				if len(tt.tools) > 0 {
-					if len(gotCalls) == 0 {
-						t.Fatalf("expected at least one tool call, got none")
-					}
-					if gotCalls[0].Function.Name == "" {
-						t.Fatalf("tool call missing function name: %#v", gotCalls[0])
-					}
-				}
+			err := client.Chat(ctx, req, func(r api.ChatResponse) error {
+				contentBuf.WriteString(r.Message.Content)
+				thinkingBuf.WriteString(r.Message.Thinking)
+				toolCalls = append(toolCalls, r.Message.ToolCalls...)
+				return nil
 			})
-		}
+			if err != nil {
+				t.Fatalf("chat error: %v", err)
+			}
+
+			// Log truncated responses
+			logTruncated := func(label, text string) {
+				if text != "" {
+					if len(text) > 800 {
+						text = text[:800] + "... [truncated]"
+					}
+					t.Logf("%s: %s", label, text)
+				}
+			}
+			logTruncated("Thinking", thinkingBuf.String())
+			logTruncated("Content", contentBuf.String())
+
+			if len(toolCalls) > 0 {
+				t.Logf("Tool calls: %d", len(toolCalls))
+				for i, call := range toolCalls {
+					t.Logf("  [%d] %s(%+v)", i, call.Function.Name, call.Function.Arguments)
+				}
+			}
+
+			// Validate tool calls if tools were provided
+			if len(tt.tools) > 0 {
+				if len(toolCalls) == 0 {
+					t.Fatal("expected at least one tool call, got none")
+				}
+				if toolCalls[0].Function.Name == "" {
+					t.Fatalf("tool call missing function name: %#v", toolCalls[0])
+				}
+			}
+		})
 	}
 }
 
diff --git a/integration/testdata/question.png b/integration/testdata/question.png
deleted file mode 100644
index 2ff10c2578..0000000000
Binary files a/integration/testdata/question.png and /dev/null differ
diff --git a/integration/testdata/sketch2code_input.jpeg b/integration/testdata/sketch2code_input.jpeg
deleted file mode 100644
index 2bcce82dc9..0000000000
Binary files a/integration/testdata/sketch2code_input.jpeg and /dev/null differ