mirror of
https://github.com/ollama/ollama.git
synced 2025-08-27 18:01:18 +02:00
Only load supported models on new engine (#11362)
* Only load supported models on new engine Verify the model is supported before trying to load * int: testcase for all library models
This commit is contained in:
57
integration/library_models_test.go
Normal file
57
integration/library_models_test.go
Normal file
@@ -0,0 +1,57 @@
|
||||
//go:build integration && library
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
// First run of this scenario on a target system will take a long time to download
|
||||
// ~1.5TB of models. Set a sufficiently large -timeout for your network speed
|
||||
func TestLibraryModelsGenerate(t *testing.T) {
|
||||
softTimeout, hardTimeout := getTimeouts(t)
|
||||
slog.Info("Setting timeouts", "soft", softTimeout, "hard", hardTimeout)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), hardTimeout)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
chatModels := libraryChatModels
|
||||
for _, model := range chatModels {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if time.Now().Sub(started) > softTimeout {
|
||||
t.Skip("skipping remaining tests to avoid excessive runtime")
|
||||
}
|
||||
if err := PullIfMissing(ctx, client, model); err != nil {
|
||||
t.Fatalf("pull failed %s", err)
|
||||
}
|
||||
req := api.GenerateRequest{
|
||||
Model: model,
|
||||
Prompt: "why is the sky blue?",
|
||||
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||
Options: map[string]interface{}{
|
||||
"temperature": 0.1,
|
||||
"seed": 123,
|
||||
},
|
||||
}
|
||||
anyResp := []string{"rayleigh", "scatter", "atmosphere", "nitrogen", "oxygen", "wavelength"}
|
||||
// Special cases
|
||||
if model == "duckdb-nsql" {
|
||||
anyResp = []string{"select", "from"}
|
||||
} else if model == "granite3-guardian" || model == "shieldgemma" || model == "llama-guard3" || model == "bespoke-minicheck" {
|
||||
anyResp = []string{"yes", "no", "safe", "unsafe"}
|
||||
} else if model == "openthinker" || model == "nexusraven" {
|
||||
anyResp = []string{"plugin", "im_sep", "components", "function call"}
|
||||
} else if model == "starcoder" || model == "starcoder2" || model == "magicoder" || model == "deepseek-coder" {
|
||||
req.Prompt = "def fibonacci():"
|
||||
anyResp = []string{"f(n)", "sequence", "n-1", "main()", "__main__", "while"}
|
||||
}
|
||||
DoGenerate(ctx, t, client, req, anyResp, 120*time.Second, 30*time.Second)
|
||||
})
|
||||
}
|
||||
}
|
@@ -72,6 +72,187 @@ var (
|
||||
"stablelm2:latest", // Predictions are off, crashes on small VRAM GPUs
|
||||
"falcon:latest",
|
||||
}
|
||||
|
||||
// Some library models are quite large - ensure large VRAM and sufficient disk space
|
||||
// before running scenarios based on this set
|
||||
libraryChatModels = []string{
|
||||
"alfred",
|
||||
"athene-v2",
|
||||
"aya-expanse",
|
||||
"aya",
|
||||
"bakllava",
|
||||
"bespoke-minicheck",
|
||||
"codebooga",
|
||||
"codegeex4",
|
||||
"codegemma",
|
||||
"codellama",
|
||||
"codeqwen",
|
||||
"codestral",
|
||||
"codeup",
|
||||
"cogito",
|
||||
"command-a",
|
||||
"command-r-plus",
|
||||
"command-r",
|
||||
"command-r7b-arabic",
|
||||
"command-r7b",
|
||||
"dbrx",
|
||||
"deepcoder",
|
||||
"deepscaler",
|
||||
"deepseek-coder-v2",
|
||||
"deepseek-coder",
|
||||
"deepseek-llm",
|
||||
"deepseek-r1",
|
||||
// "deepseek-v2.5", // requires 155 GB VRAM
|
||||
"deepseek-v2",
|
||||
// "deepseek-v3", // requires 482 GB VRAM
|
||||
"devstral",
|
||||
"dolphin-llama3",
|
||||
"dolphin-mistral",
|
||||
"dolphin-mixtral",
|
||||
"dolphin-phi",
|
||||
"dolphin3",
|
||||
"dolphincoder",
|
||||
"duckdb-nsql",
|
||||
"everythinglm",
|
||||
"exaone-deep",
|
||||
"exaone3.5",
|
||||
"falcon",
|
||||
"falcon2",
|
||||
"falcon3",
|
||||
"firefunction-v2",
|
||||
"gemma",
|
||||
"gemma2",
|
||||
"gemma3",
|
||||
"gemma3n",
|
||||
"glm4",
|
||||
"goliath",
|
||||
"granite-code",
|
||||
"granite3-dense",
|
||||
"granite3-guardian",
|
||||
"granite3-moe",
|
||||
"granite3.1-dense",
|
||||
"granite3.1-moe",
|
||||
"granite3.2-vision",
|
||||
"granite3.2",
|
||||
"granite3.3",
|
||||
"hermes3",
|
||||
"internlm2",
|
||||
"llama-guard3",
|
||||
"llama-pro",
|
||||
"llama2-chinese",
|
||||
"llama2-uncensored",
|
||||
"llama2",
|
||||
"llama3-chatqa",
|
||||
"llama3-gradient",
|
||||
"llama3-groq-tool-use",
|
||||
"llama3.1",
|
||||
"llama3.2-vision",
|
||||
"llama3.2",
|
||||
"llama3.3",
|
||||
"llama3",
|
||||
"llama4",
|
||||
"llava-llama3",
|
||||
"llava-phi3",
|
||||
"llava",
|
||||
"magicoder",
|
||||
"magistral",
|
||||
"marco-o1",
|
||||
"mathstral",
|
||||
"meditron",
|
||||
"medllama2",
|
||||
"megadolphin",
|
||||
"minicpm-v",
|
||||
"mistral-large",
|
||||
"mistral-nemo",
|
||||
"mistral-openorca",
|
||||
"mistral-small",
|
||||
"mistral-small3.1",
|
||||
"mistral-small3.2",
|
||||
"mistral",
|
||||
"mistrallite",
|
||||
"mixtral",
|
||||
"moondream",
|
||||
"nemotron-mini",
|
||||
"nemotron",
|
||||
"neural-chat",
|
||||
"nexusraven",
|
||||
"notus",
|
||||
"nous-hermes",
|
||||
"nous-hermes2-mixtral",
|
||||
"nous-hermes2",
|
||||
"nuextract",
|
||||
"olmo2",
|
||||
"open-orca-platypus2",
|
||||
"openchat",
|
||||
"opencoder",
|
||||
"openhermes",
|
||||
"openthinker",
|
||||
"orca-mini",
|
||||
"orca2",
|
||||
// "phi", // unreliable
|
||||
"phi3.5",
|
||||
"phi3",
|
||||
"phi4-mini-reasoning",
|
||||
"phi4-mini",
|
||||
"phi4-reasoning",
|
||||
"phi4",
|
||||
"phind-codellama",
|
||||
"qwen",
|
||||
"qwen2-math",
|
||||
"qwen2.5-coder",
|
||||
"qwen2.5",
|
||||
"qwen2.5vl",
|
||||
"qwen2",
|
||||
"qwen3:0.6b", // dense
|
||||
"qwen3:30b", // MOE
|
||||
"qwq",
|
||||
"r1-1776",
|
||||
"reader-lm",
|
||||
"reflection",
|
||||
"sailor2",
|
||||
"samantha-mistral",
|
||||
"shieldgemma",
|
||||
"smallthinker",
|
||||
"smollm",
|
||||
"smollm2",
|
||||
"solar-pro",
|
||||
"solar",
|
||||
"sqlcoder",
|
||||
"stable-beluga",
|
||||
"stable-code",
|
||||
"stablelm-zephyr",
|
||||
"stablelm2",
|
||||
"starcoder",
|
||||
"starcoder2",
|
||||
"starling-lm",
|
||||
"tinydolphin",
|
||||
"tinyllama",
|
||||
"tulu3",
|
||||
"vicuna",
|
||||
"wizard-math",
|
||||
"wizard-vicuna-uncensored",
|
||||
"wizard-vicuna",
|
||||
"wizardcoder",
|
||||
"wizardlm-uncensored",
|
||||
"wizardlm2",
|
||||
"xwinlm",
|
||||
"yarn-llama2",
|
||||
"yarn-mistral",
|
||||
"yi-coder",
|
||||
"yi",
|
||||
"zephyr",
|
||||
}
|
||||
libraryEmbedModels = []string{
|
||||
"all-minilm",
|
||||
"bge-large",
|
||||
"bge-m3",
|
||||
"granite-embedding",
|
||||
"mxbai-embed-large",
|
||||
"nomic-embed-text",
|
||||
"paraphrase-multilingual",
|
||||
"snowflake-arctic-embed",
|
||||
"snowflake-arctic-embed2",
|
||||
}
|
||||
)
|
||||
|
||||
func Init() {
|
||||
@@ -313,6 +494,10 @@ func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq ap
|
||||
t.Errorf("generate stalled. Response so far:%s", buf.String())
|
||||
}
|
||||
case <-done:
|
||||
if genErr != nil && strings.Contains(genErr.Error(), "model requires more system memory") {
|
||||
slog.Warn("model is too large for the target test system", "model", genReq.Model, "error", genErr)
|
||||
return
|
||||
}
|
||||
require.NoError(t, genErr, "failed with %s request prompt %s ", genReq.Model, genReq.Prompt)
|
||||
// Verify the response contains the expected data
|
||||
response := buf.String()
|
||||
|
Reference in New Issue
Block a user