mirror of
https://github.com/ollama/ollama.git
synced 2025-08-23 20:13:05 +02:00
414 lines
13 KiB
Go
414 lines
13 KiB
Go
package server
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"net/http"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/gin-gonic/gin"
|
|
"github.com/ollama/ollama/api"
|
|
"github.com/ollama/ollama/discover"
|
|
"github.com/ollama/ollama/fs/ggml"
|
|
"github.com/ollama/ollama/llm"
|
|
)
|
|
|
|
func TestGenerateDebugRenderOnly(t *testing.T) {
|
|
gin.SetMode(gin.TestMode)
|
|
|
|
mock := mockRunner{
|
|
CompletionResponse: llm.CompletionResponse{
|
|
Done: true,
|
|
DoneReason: llm.DoneReasonStop,
|
|
PromptEvalCount: 1,
|
|
PromptEvalDuration: 1,
|
|
EvalCount: 1,
|
|
EvalDuration: 1,
|
|
},
|
|
}
|
|
|
|
s := Server{
|
|
sched: &Scheduler{
|
|
pendingReqCh: make(chan *LlmRequest, 1),
|
|
finishedReqCh: make(chan *LlmRequest, 1),
|
|
expiredCh: make(chan *runnerRef, 1),
|
|
unloadedCh: make(chan any, 1),
|
|
loaded: make(map[string]*runnerRef),
|
|
newServerFn: newMockServer(&mock),
|
|
getGpuFn: discover.GetGPUInfo,
|
|
getCpuFn: discover.GetCPUInfo,
|
|
reschedDelay: 250 * time.Millisecond,
|
|
loadFn: func(req *LlmRequest, _ *ggml.GGML, _ discover.GpuInfoList, _ bool) bool {
|
|
// add small delay to simulate loading
|
|
time.Sleep(time.Millisecond)
|
|
req.successCh <- &runnerRef{
|
|
llama: &mock,
|
|
}
|
|
return false
|
|
},
|
|
},
|
|
}
|
|
|
|
go s.sched.Run(t.Context())
|
|
|
|
// Create a test model
|
|
stream := false
|
|
_, digest := createBinFile(t, ggml.KV{
|
|
"general.architecture": "llama",
|
|
"llama.block_count": uint32(1),
|
|
"llama.context_length": uint32(8192),
|
|
"llama.embedding_length": uint32(4096),
|
|
"llama.attention.head_count": uint32(32),
|
|
"llama.attention.head_count_kv": uint32(8),
|
|
"tokenizer.ggml.tokens": []string{""},
|
|
"tokenizer.ggml.scores": []float32{0},
|
|
"tokenizer.ggml.token_type": []int32{0},
|
|
}, []*ggml.Tensor{
|
|
{Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.ffn_gate.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.ffn_up.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.ffn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.attn_k.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.attn_output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.attn_q.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.attn_v.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
})
|
|
|
|
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
|
Model: "test-model",
|
|
Files: map[string]string{"file.gguf": digest},
|
|
Template: "{{ .Prompt }}",
|
|
Stream: &stream,
|
|
})
|
|
|
|
if w.Code != http.StatusOK {
|
|
t.Fatalf("expected status 200, got %d", w.Code)
|
|
}
|
|
|
|
tests := []struct {
|
|
name string
|
|
request api.GenerateRequest
|
|
expectDebug bool
|
|
expectTemplate string
|
|
expectNumImages int
|
|
}{
|
|
{
|
|
name: "debug render only enabled",
|
|
request: api.GenerateRequest{
|
|
Model: "test-model",
|
|
Prompt: "Hello, world!",
|
|
DebugRenderOnly: true,
|
|
},
|
|
expectDebug: true,
|
|
expectTemplate: "Hello, world!",
|
|
},
|
|
{
|
|
name: "debug render only disabled",
|
|
request: api.GenerateRequest{
|
|
Model: "test-model",
|
|
Prompt: "Hello, world!",
|
|
DebugRenderOnly: false,
|
|
},
|
|
expectDebug: false,
|
|
},
|
|
{
|
|
name: "debug render only with system prompt",
|
|
request: api.GenerateRequest{
|
|
Model: "test-model",
|
|
Prompt: "User question",
|
|
System: "You are a helpful assistant",
|
|
DebugRenderOnly: true,
|
|
},
|
|
expectDebug: true,
|
|
expectTemplate: "User question",
|
|
},
|
|
{
|
|
name: "debug render only with template",
|
|
request: api.GenerateRequest{
|
|
Model: "test-model",
|
|
Prompt: "Hello",
|
|
Template: "PROMPT: {{ .Prompt }}",
|
|
DebugRenderOnly: true,
|
|
},
|
|
expectDebug: true,
|
|
expectTemplate: "PROMPT: Hello",
|
|
},
|
|
{
|
|
name: "debug render only with images",
|
|
request: api.GenerateRequest{
|
|
Model: "test-model",
|
|
Prompt: "Describe this image",
|
|
Images: []api.ImageData{[]byte("fake-image-data")},
|
|
DebugRenderOnly: true,
|
|
},
|
|
expectDebug: true,
|
|
expectTemplate: "[img-0]\n\nDescribe this image",
|
|
expectNumImages: 1,
|
|
},
|
|
{
|
|
name: "debug render only with raw mode",
|
|
request: api.GenerateRequest{
|
|
Model: "test-model",
|
|
Prompt: "Raw prompt text",
|
|
Raw: true,
|
|
DebugRenderOnly: true,
|
|
},
|
|
expectDebug: true,
|
|
expectTemplate: "Raw prompt text",
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
// Test both with and without streaming
|
|
streamValues := []bool{false, true}
|
|
for _, stream := range streamValues {
|
|
streamSuffix := ""
|
|
if stream {
|
|
streamSuffix = " (streaming)"
|
|
}
|
|
t.Run(tt.name+streamSuffix, func(t *testing.T) {
|
|
req := tt.request
|
|
req.Stream = &stream
|
|
w := createRequest(t, s.GenerateHandler, req)
|
|
|
|
if tt.expectDebug {
|
|
if w.Code != http.StatusOK {
|
|
t.Errorf("expected status %d, got %d, body: %s", http.StatusOK, w.Code, w.Body.String())
|
|
}
|
|
|
|
var response api.DebugTemplateResponse
|
|
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
|
|
t.Fatalf("failed to unmarshal response: %v", err)
|
|
}
|
|
|
|
if response.Model != tt.request.Model {
|
|
t.Errorf("expected model %s, got %s", tt.request.Model, response.Model)
|
|
}
|
|
|
|
if tt.expectTemplate != "" && response.DebugInfo.RenderedTemplate != tt.expectTemplate {
|
|
t.Errorf("expected template %q, got %q", tt.expectTemplate, response.DebugInfo.RenderedTemplate)
|
|
}
|
|
|
|
if tt.expectNumImages > 0 && response.DebugInfo.ImageCount != tt.expectNumImages {
|
|
t.Errorf("expected image count %d, got %d", tt.expectNumImages, response.DebugInfo.ImageCount)
|
|
}
|
|
} else {
|
|
// When debug is disabled, it should attempt normal processing
|
|
if w.Code != http.StatusOK {
|
|
t.Errorf("expected status %d, got %d", http.StatusOK, w.Code)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestChatDebugRenderOnly(t *testing.T) {
|
|
gin.SetMode(gin.TestMode)
|
|
|
|
mock := mockRunner{
|
|
CompletionResponse: llm.CompletionResponse{
|
|
Done: true,
|
|
DoneReason: llm.DoneReasonStop,
|
|
PromptEvalCount: 1,
|
|
PromptEvalDuration: 1,
|
|
EvalCount: 1,
|
|
EvalDuration: 1,
|
|
},
|
|
}
|
|
|
|
s := Server{
|
|
sched: &Scheduler{
|
|
pendingReqCh: make(chan *LlmRequest, 1),
|
|
finishedReqCh: make(chan *LlmRequest, 1),
|
|
expiredCh: make(chan *runnerRef, 1),
|
|
unloadedCh: make(chan any, 1),
|
|
loaded: make(map[string]*runnerRef),
|
|
newServerFn: newMockServer(&mock),
|
|
getGpuFn: discover.GetGPUInfo,
|
|
getCpuFn: discover.GetCPUInfo,
|
|
reschedDelay: 250 * time.Millisecond,
|
|
loadFn: func(req *LlmRequest, _ *ggml.GGML, _ discover.GpuInfoList, _ bool) bool {
|
|
// add small delay to simulate loading
|
|
time.Sleep(time.Millisecond)
|
|
req.successCh <- &runnerRef{
|
|
llama: &mock,
|
|
}
|
|
return false
|
|
},
|
|
},
|
|
}
|
|
|
|
go s.sched.Run(t.Context())
|
|
|
|
// Create a test model
|
|
stream := false
|
|
_, digest := createBinFile(t, ggml.KV{
|
|
"general.architecture": "llama",
|
|
"llama.block_count": uint32(1),
|
|
"llama.context_length": uint32(8192),
|
|
"llama.embedding_length": uint32(4096),
|
|
"llama.attention.head_count": uint32(32),
|
|
"llama.attention.head_count_kv": uint32(8),
|
|
"tokenizer.ggml.tokens": []string{""},
|
|
"tokenizer.ggml.scores": []float32{0},
|
|
"tokenizer.ggml.token_type": []int32{0},
|
|
}, []*ggml.Tensor{
|
|
{Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.ffn_gate.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.ffn_up.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.ffn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.attn_k.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.attn_output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.attn_q.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "blk.0.attn_v.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
{Name: "output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
|
|
})
|
|
|
|
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
|
Model: "test-model",
|
|
Files: map[string]string{"file.gguf": digest},
|
|
Template: "{{ if .Tools }}{{ .Tools }}{{ end }}{{ range .Messages }}{{ .Role }}: {{ .Content }}\n{{ end }}",
|
|
Stream: &stream,
|
|
})
|
|
|
|
if w.Code != http.StatusOK {
|
|
t.Fatalf("expected status 200, got %d", w.Code)
|
|
}
|
|
|
|
tests := []struct {
|
|
name string
|
|
request api.ChatRequest
|
|
expectDebug bool
|
|
expectTemplate string
|
|
expectNumImages int
|
|
}{
|
|
{
|
|
name: "chat debug render only enabled",
|
|
request: api.ChatRequest{
|
|
Model: "test-model",
|
|
Messages: []api.Message{
|
|
{Role: "system", Content: "You are a helpful assistant"},
|
|
{Role: "user", Content: "Hello"},
|
|
},
|
|
DebugRenderOnly: true,
|
|
},
|
|
expectDebug: true,
|
|
expectTemplate: "system: You are a helpful assistant\nuser: Hello\n",
|
|
},
|
|
{
|
|
name: "chat debug render only disabled",
|
|
request: api.ChatRequest{
|
|
Model: "test-model",
|
|
Messages: []api.Message{
|
|
{Role: "user", Content: "Hello"},
|
|
},
|
|
DebugRenderOnly: false,
|
|
},
|
|
expectDebug: false,
|
|
},
|
|
{
|
|
name: "chat debug with assistant message",
|
|
request: api.ChatRequest{
|
|
Model: "test-model",
|
|
Messages: []api.Message{
|
|
{Role: "user", Content: "Hello"},
|
|
{Role: "assistant", Content: "Hi there!"},
|
|
{Role: "user", Content: "How are you?"},
|
|
},
|
|
DebugRenderOnly: true,
|
|
},
|
|
expectDebug: true,
|
|
expectTemplate: "user: Hello\nassistant: Hi there!\nuser: How are you?\n",
|
|
},
|
|
{
|
|
name: "chat debug with images",
|
|
request: api.ChatRequest{
|
|
Model: "test-model",
|
|
Messages: []api.Message{
|
|
{
|
|
Role: "user",
|
|
Content: "What's in this image?",
|
|
Images: []api.ImageData{[]byte("fake-image-data")},
|
|
},
|
|
},
|
|
DebugRenderOnly: true,
|
|
},
|
|
expectDebug: true,
|
|
expectTemplate: "user: [img-0]What's in this image?\n",
|
|
expectNumImages: 1,
|
|
},
|
|
{
|
|
name: "chat debug with tools",
|
|
request: api.ChatRequest{
|
|
Model: "test-model",
|
|
Messages: []api.Message{
|
|
{Role: "user", Content: "Get the weather"},
|
|
},
|
|
Tools: api.Tools{
|
|
{
|
|
Type: "function",
|
|
Function: api.ToolFunction{
|
|
Name: "get_weather",
|
|
Description: "Get weather information",
|
|
},
|
|
},
|
|
},
|
|
DebugRenderOnly: true,
|
|
},
|
|
expectDebug: true,
|
|
expectTemplate: "[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get weather information\",\"parameters\":{\"type\":\"\",\"required\":null,\"properties\":null}}}]user: Get the weather\n",
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
// Test both with and without streaming
|
|
streamValues := []bool{false, true}
|
|
for _, stream := range streamValues {
|
|
streamSuffix := ""
|
|
if stream {
|
|
streamSuffix = " (streaming)"
|
|
}
|
|
t.Run(tt.name+streamSuffix, func(t *testing.T) {
|
|
req := tt.request
|
|
req.Stream = &stream
|
|
w := createRequest(t, s.ChatHandler, req)
|
|
|
|
if tt.expectDebug {
|
|
if w.Code != http.StatusOK {
|
|
t.Errorf("expected status %d, got %d, body: %s", http.StatusOK, w.Code, w.Body.String())
|
|
}
|
|
|
|
var response api.DebugTemplateResponse
|
|
if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
|
|
t.Fatalf("failed to unmarshal response: %v", err)
|
|
}
|
|
|
|
if response.Model != tt.request.Model {
|
|
t.Errorf("expected model %s, got %s", tt.request.Model, response.Model)
|
|
}
|
|
|
|
if tt.expectTemplate != "" && response.DebugInfo.RenderedTemplate != tt.expectTemplate {
|
|
t.Errorf("expected template %q, got %q", tt.expectTemplate, response.DebugInfo.RenderedTemplate)
|
|
}
|
|
|
|
if tt.expectNumImages > 0 && response.DebugInfo.ImageCount != tt.expectNumImages {
|
|
t.Errorf("expected image count %d, got %d", tt.expectNumImages, response.DebugInfo.ImageCount)
|
|
}
|
|
} else {
|
|
// When debug is disabled, it should attempt normal processing
|
|
if w.Code != http.StatusOK {
|
|
t.Errorf("expected status %d, got %d", http.StatusOK, w.Code)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|