From 86a622cbdc69e9fd501764ff7565e977fc98f00a Mon Sep 17 00:00:00 2001 From: Patrick Devine Date: Tue, 31 Dec 2024 18:02:30 -0800 Subject: [PATCH] Update the /api/create endpoint to use JSON (#7935) Replaces `POST /api/create` to use JSON instead of a Modelfile. This is a breaking change. --- api/types.go | 17 +- cmd/cmd.go | 233 ++---------- cmd/interactive.go | 43 +-- cmd/interactive_test.go | 56 --- parser/expandpath_test.go | 57 +++ parser/parser.go | 271 +++++++++++++- parser/parser_test.go | 153 ++++++++ server/create.go | 667 +++++++++++++++++++++++++++++++++ server/images.go | 327 +--------------- server/model.go | 145 ------- server/model_test.go | 84 ----- server/routes.go | 72 ---- server/routes_create_test.go | 238 ++++++++---- server/routes_delete_test.go | 11 +- server/routes_generate_test.go | 147 ++++---- server/routes_list_test.go | 7 +- server/routes_test.go | 89 +++-- 17 files changed, 1523 insertions(+), 1094 deletions(-) create mode 100644 parser/expandpath_test.go create mode 100644 server/create.go diff --git a/api/types.go b/api/types.go index 0ea0b9bf0..5100ed2b4 100644 --- a/api/types.go +++ b/api/types.go @@ -295,10 +295,21 @@ type EmbeddingResponse struct { // CreateRequest is the request passed to [Client.Create]. type CreateRequest struct { - Model string `json:"model"` + Model string `json:"model"` + Stream *bool `json:"stream,omitempty"` + Quantize string `json:"quantize,omitempty"` + + From string `json:"from,omitempty"` + Files map[string]string `json:"files,omitempty"` + Adapters map[string]string `json:"adapters,omitempty"` + Template string `json:"template,omitempty"` + License any `json:"license,omitempty"` + System string `json:"system,omitempty"` + Parameters map[string]any `json:"parameters,omitempty"` + Messages []Message `json:"messages,omitempty"` + + // Deprecated: set with the other request options Modelfile string `json:"modelfile"` - Stream *bool `json:"stream,omitempty"` - Quantize string `json:"quantize,omitempty"` // Deprecated: set the model name with Model instead Name string `json:"name"` diff --git a/cmd/cmd.go b/cmd/cmd.go index 2f77640c2..181f24d43 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -1,13 +1,10 @@ package cmd import ( - "archive/zip" "bufio" - "bytes" "context" "crypto/ed25519" "crypto/rand" - "crypto/sha256" "encoding/json" "encoding/pem" "errors" @@ -46,10 +43,7 @@ import ( "github.com/ollama/ollama/version" ) -var ( - errModelNotFound = errors.New("no Modelfile or safetensors files found") - errModelfileNotFound = errors.New("specified Modelfile wasn't found") -) +var errModelfileNotFound = errors.New("specified Modelfile wasn't found") func getModelfileName(cmd *cobra.Command) (string, error) { fn, _ := cmd.Flags().GetString("file") @@ -102,68 +96,52 @@ func CreateHandler(cmd *cobra.Command, args []string) error { return err } - home, err := os.UserHomeDir() + status := "gathering model components" + spinner := progress.NewSpinner(status) + p.Add(status, spinner) + + req, err := modelfile.CreateRequest() if err != nil { return err } + spinner.Stop() - status := "transferring model data" - spinner := progress.NewSpinner(status) - p.Add(status, spinner) - defer p.Stop() + req.Name = args[0] + quantize, _ := cmd.Flags().GetString("quantize") + if quantize != "" { + req.Quantize = quantize + } client, err := api.ClientFromEnvironment() if err != nil { return err } - for i := range modelfile.Commands { - switch modelfile.Commands[i].Name { - case "model", "adapter": - path := modelfile.Commands[i].Args - if path == "~" { - path = home - } else if strings.HasPrefix(path, "~/") { - path = filepath.Join(home, path[2:]) - } - - if !filepath.IsAbs(path) { - path = filepath.Join(filepath.Dir(filename), path) - } - - fi, err := os.Stat(path) - if errors.Is(err, os.ErrNotExist) && modelfile.Commands[i].Name == "model" { - continue - } else if err != nil { + if len(req.Files) > 0 { + fileMap := map[string]string{} + for f, digest := range req.Files { + if _, err := createBlob(cmd, client, f, digest, p); err != nil { return err } - - if fi.IsDir() { - // this is likely a safetensors or pytorch directory - // TODO make this work w/ adapters - tempfile, err := tempZipFiles(path) - if err != nil { - return err - } - defer os.RemoveAll(tempfile) - - path = tempfile - } - - digest, err := createBlob(cmd, client, path, spinner) - if err != nil { - return err - } - - modelfile.Commands[i].Args = "@" + digest + fileMap[filepath.Base(f)] = digest } + req.Files = fileMap + } + + if len(req.Adapters) > 0 { + fileMap := map[string]string{} + for f, digest := range req.Adapters { + if _, err := createBlob(cmd, client, f, digest, p); err != nil { + return err + } + fileMap[filepath.Base(f)] = digest + } + req.Adapters = fileMap } bars := make(map[string]*progress.Bar) fn := func(resp api.ProgressResponse) error { if resp.Digest != "" { - spinner.Stop() - bar, ok := bars[resp.Digest] if !ok { bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed) @@ -183,145 +161,20 @@ func CreateHandler(cmd *cobra.Command, args []string) error { return nil } - quantize, _ := cmd.Flags().GetString("quantize") - - request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantize: quantize} - if err := client.Create(cmd.Context(), &request, fn); err != nil { + if err := client.Create(cmd.Context(), req, fn); err != nil { return err } return nil } -func tempZipFiles(path string) (string, error) { - tempfile, err := os.CreateTemp("", "ollama-tf") +func createBlob(cmd *cobra.Command, client *api.Client, path string, digest string, p *progress.Progress) (string, error) { + realPath, err := filepath.EvalSymlinks(path) if err != nil { return "", err } - defer tempfile.Close() - detectContentType := func(path string) (string, error) { - f, err := os.Open(path) - if err != nil { - return "", err - } - defer f.Close() - - var b bytes.Buffer - b.Grow(512) - - if _, err := io.CopyN(&b, f, 512); err != nil && !errors.Is(err, io.EOF) { - return "", err - } - - contentType, _, _ := strings.Cut(http.DetectContentType(b.Bytes()), ";") - return contentType, nil - } - - glob := func(pattern, contentType string) ([]string, error) { - matches, err := filepath.Glob(pattern) - if err != nil { - return nil, err - } - - for _, safetensor := range matches { - if ct, err := detectContentType(safetensor); err != nil { - return nil, err - } else if ct != contentType { - return nil, fmt.Errorf("invalid content type: expected %s for %s", ct, safetensor) - } - } - - return matches, nil - } - - var files []string - if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 { - // safetensors files might be unresolved git lfs references; skip if they are - // covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors - files = append(files, st...) - } else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 { - // covers adapters.safetensors - files = append(files, st...) - } else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 { - // covers adapter_model.safetensors - files = append(files, st...) - } else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 { - // pytorch files might also be unresolved git lfs references; skip if they are - // covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin - files = append(files, pt...) - } else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/zip"); len(pt) > 0 { - // pytorch files might also be unresolved git lfs references; skip if they are - // covers consolidated.x.pth, consolidated.pth - files = append(files, pt...) - } else { - return "", errModelNotFound - } - - // add configuration files, json files are detected as text/plain - js, err := glob(filepath.Join(path, "*.json"), "text/plain") - if err != nil { - return "", err - } - files = append(files, js...) - - // bert models require a nested config.json - // TODO(mxyng): merge this with the glob above - js, err = glob(filepath.Join(path, "**/*.json"), "text/plain") - if err != nil { - return "", err - } - files = append(files, js...) - - if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 { - // add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob - // tokenizer.model might be a unresolved git lfs reference; error if it is - files = append(files, tks...) - } else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 { - // some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B) - files = append(files, tks...) - } - - zipfile := zip.NewWriter(tempfile) - defer zipfile.Close() - - for _, file := range files { - f, err := os.Open(file) - if err != nil { - return "", err - } - defer f.Close() - - fi, err := f.Stat() - if err != nil { - return "", err - } - - zfi, err := zip.FileInfoHeader(fi) - if err != nil { - return "", err - } - - zfi.Name, err = filepath.Rel(path, file) - if err != nil { - return "", err - } - - zf, err := zipfile.CreateHeader(zfi) - if err != nil { - return "", err - } - - if _, err := io.Copy(zf, f); err != nil { - return "", err - } - } - - return tempfile.Name(), nil -} - -func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *progress.Spinner) (string, error) { - bin, err := os.Open(path) + bin, err := os.Open(realPath) if err != nil { return "", err } @@ -334,18 +187,11 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *pr } fileSize := fileInfo.Size() - hash := sha256.New() - if _, err := io.Copy(hash, bin); err != nil { - return "", err - } - - if _, err := bin.Seek(0, io.SeekStart); err != nil { - return "", err - } - var pw progressWriter - status := "transferring model data 0%" - spinner.SetMessage(status) + status := fmt.Sprintf("copying file %s 0%%", digest) + spinner := progress.NewSpinner(status) + p.Add(status, spinner) + defer spinner.Stop() done := make(chan struct{}) defer close(done) @@ -356,15 +202,14 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *pr for { select { case <-ticker.C: - spinner.SetMessage(fmt.Sprintf("transferring model data %d%%", int(100*pw.n.Load()/fileSize))) + spinner.SetMessage(fmt.Sprintf("copying file %s %d%%", digest, int(100*pw.n.Load()/fileSize))) case <-done: - spinner.SetMessage("transferring model data 100%") + spinner.SetMessage(fmt.Sprintf("copying file %s 100%%", digest)) return } } }() - digest := fmt.Sprintf("sha256:%x", hash.Sum(nil)) if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil { return "", err } diff --git a/cmd/interactive.go b/cmd/interactive.go index 012fd1690..af58f4fc4 100644 --- a/cmd/interactive.go +++ b/cmd/interactive.go @@ -13,11 +13,9 @@ import ( "strings" "github.com/spf13/cobra" - "golang.org/x/exp/maps" "github.com/ollama/ollama/api" "github.com/ollama/ollama/envconfig" - "github.com/ollama/ollama/parser" "github.com/ollama/ollama/readline" "github.com/ollama/ollama/types/errtypes" ) @@ -213,10 +211,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { return err } - req := &api.CreateRequest{ - Name: args[1], - Modelfile: buildModelfile(opts), - } + req := NewCreateRequest(args[1], opts) fn := func(resp api.ProgressResponse) error { return nil } err = client.Create(cmd.Context(), req, fn) if err != nil { @@ -459,39 +454,25 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { } } -func buildModelfile(opts runOptions) string { - var f parser.File - f.Commands = append(f.Commands, parser.Command{Name: "model", Args: cmp.Or(opts.ParentModel, opts.Model)}) +func NewCreateRequest(name string, opts runOptions) *api.CreateRequest { + req := &api.CreateRequest{ + Name: name, + From: cmp.Or(opts.ParentModel, opts.Model), + } if opts.System != "" { - f.Commands = append(f.Commands, parser.Command{Name: "system", Args: opts.System}) + req.System = opts.System } - keys := maps.Keys(opts.Options) - slices.Sort(keys) - for _, k := range keys { - v := opts.Options[k] - var cmds []parser.Command - switch t := v.(type) { - case []string: - for _, s := range t { - cmds = append(cmds, parser.Command{Name: k, Args: s}) - } - default: - cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", t)}) - } - - f.Commands = append(f.Commands, cmds...) + if len(opts.Options) > 0 { + req.Parameters = opts.Options } - for _, msg := range opts.Messages { - if strings.Contains(msg.Content, "\"") { - msg.Content = `"""` + msg.Content + `"""` - } - f.Commands = append(f.Commands, parser.Command{Name: "message", Args: fmt.Sprintf("%s: %s", msg.Role, msg.Content)}) + if len(opts.Messages) > 0 { + req.Messages = opts.Messages } - return f.String() + return req } func normalizeFilePath(fp string) string { diff --git a/cmd/interactive_test.go b/cmd/interactive_test.go index 118f42640..3f60448b6 100644 --- a/cmd/interactive_test.go +++ b/cmd/interactive_test.go @@ -3,10 +3,7 @@ package cmd import ( "testing" - "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" - - "github.com/ollama/ollama/api" ) func TestExtractFilenames(t *testing.T) { @@ -53,56 +50,3 @@ d:\path with\spaces\seven.JPEG inbetween7 c:\users\jdoe\eight.png inbetween8 assert.Contains(t, res[9], "ten.PNG") assert.Contains(t, res[9], "E:") } - -func TestModelfileBuilder(t *testing.T) { - opts := runOptions{ - Model: "hork", - System: "You are part horse and part shark, but all hork. Do horklike things", - Messages: []api.Message{ - {Role: "user", Content: "Hey there hork!"}, - {Role: "assistant", Content: "Yes it is true, I am half horse, half shark."}, - }, - Options: map[string]any{ - "temperature": 0.9, - "seed": 42, - "penalize_newline": false, - "stop": []string{"hi", "there"}, - }, - } - - t.Run("model", func(t *testing.T) { - expect := `FROM hork -SYSTEM You are part horse and part shark, but all hork. Do horklike things -PARAMETER penalize_newline false -PARAMETER seed 42 -PARAMETER stop hi -PARAMETER stop there -PARAMETER temperature 0.9 -MESSAGE user Hey there hork! -MESSAGE assistant Yes it is true, I am half horse, half shark. -` - - actual := buildModelfile(opts) - if diff := cmp.Diff(expect, actual); diff != "" { - t.Errorf("mismatch (-want +got):\n%s", diff) - } - }) - - t.Run("parent model", func(t *testing.T) { - opts.ParentModel = "horseshark" - expect := `FROM horseshark -SYSTEM You are part horse and part shark, but all hork. Do horklike things -PARAMETER penalize_newline false -PARAMETER seed 42 -PARAMETER stop hi -PARAMETER stop there -PARAMETER temperature 0.9 -MESSAGE user Hey there hork! -MESSAGE assistant Yes it is true, I am half horse, half shark. -` - actual := buildModelfile(opts) - if diff := cmp.Diff(expect, actual); diff != "" { - t.Errorf("mismatch (-want +got):\n%s", diff) - } - }) -} diff --git a/parser/expandpath_test.go b/parser/expandpath_test.go new file mode 100644 index 000000000..c51e01cbe --- /dev/null +++ b/parser/expandpath_test.go @@ -0,0 +1,57 @@ +package parser + +import ( + "os" + "os/user" + "path/filepath" + "testing" +) + +func TestExpandPath(t *testing.T) { + mockCurrentUser := func() (*user.User, error) { + return &user.User{ + Username: "testuser", + HomeDir: "/home/testuser", + }, nil + } + + mockLookupUser := func(username string) (*user.User, error) { + fakeUsers := map[string]string{ + "testuser": "/home/testuser", + "anotheruser": "/home/anotheruser", + } + + if homeDir, ok := fakeUsers[username]; ok { + return &user.User{ + Username: username, + HomeDir: homeDir, + }, nil + } + return nil, os.ErrNotExist + } + + tests := []struct { + input string + expected string + windowsExpected string + shouldErr bool + }{ + {"~", "/home/testuser", "D:\\home\\testuser", false}, + {"~/myfolder/myfile.txt", "/home/testuser/myfolder/myfile.txt", "D:\\home\\testuser\\myfolder\\myfile.txt", false}, + {"~anotheruser/docs/file.txt", "/home/anotheruser/docs/file.txt", "D:\\home\\anotheruser\\docs\\file.txt", false}, + {"~nonexistentuser/file.txt", "", "", true}, + {"relative/path/to/file", filepath.Join(os.Getenv("PWD"), "relative/path/to/file"), "relative\\path\\to\\file", false}, + {"/absolute/path/to/file", "/absolute/path/to/file", "D:\\absolute\\path\\to\\file", false}, + {".", os.Getenv("PWD"), os.Getenv("PWD"), false}, + } + + for _, test := range tests { + result, err := expandPathImpl(test.input, mockCurrentUser, mockLookupUser) + if (err != nil) != test.shouldErr { + t.Errorf("expandPathImpl(%q) returned error: %v, expected error: %v", test.input, err != nil, test.shouldErr) + } + if result != test.expected && result != test.windowsExpected && !test.shouldErr { + t.Errorf("expandPathImpl(%q) = %q, want %q", test.input, result, test.expected) + } + } +} diff --git a/parser/parser.go b/parser/parser.go index cc78d1aa4..e38b640a7 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -3,21 +3,30 @@ package parser import ( "bufio" "bytes" + "crypto/sha256" "errors" "fmt" "io" + "net/http" + "os" + "os/user" + "path/filepath" "strconv" "strings" "golang.org/x/text/encoding/unicode" "golang.org/x/text/transform" + + "github.com/ollama/ollama/api" ) -type File struct { +var ErrModelNotFound = errors.New("no Modelfile or safetensors files found") + +type Modelfile struct { Commands []Command } -func (f File) String() string { +func (f Modelfile) String() string { var sb strings.Builder for _, cmd := range f.Commands { fmt.Fprintln(&sb, cmd.String()) @@ -26,6 +35,223 @@ func (f File) String() string { return sb.String() } +// CreateRequest creates a new *api.CreateRequest from an existing Modelfile +func (f Modelfile) CreateRequest() (*api.CreateRequest, error) { + req := &api.CreateRequest{} + + var messages []api.Message + var licenses []string + params := make(map[string]any) + + for _, c := range f.Commands { + switch c.Name { + case "model": + path, err := expandPath(c.Args) + if err != nil { + return nil, err + } + + digestMap, err := fileDigestMap(path) + if errors.Is(err, os.ErrNotExist) { + req.From = c.Args + continue + } else if err != nil { + return nil, err + } + + req.Files = digestMap + case "adapter": + path, err := expandPath(c.Args) + if err != nil { + return nil, err + } + + digestMap, err := fileDigestMap(path) + if err != nil { + return nil, err + } + + req.Adapters = digestMap + case "template": + req.Template = c.Args + case "system": + req.System = c.Args + case "license": + licenses = append(licenses, c.Args) + case "message": + role, msg, _ := strings.Cut(c.Args, ": ") + messages = append(messages, api.Message{Role: role, Content: msg}) + default: + ps, err := api.FormatParams(map[string][]string{c.Name: {c.Args}}) + if err != nil { + return nil, err + } + + for k, v := range ps { + if ks, ok := params[k].([]string); ok { + params[k] = append(ks, v.([]string)...) + } else if vs, ok := v.([]string); ok { + params[k] = vs + } else { + params[k] = v + } + } + } + } + + if len(params) > 0 { + req.Parameters = params + } + if len(messages) > 0 { + req.Messages = messages + } + if len(licenses) > 0 { + req.License = licenses + } + + return req, nil +} + +func fileDigestMap(path string) (map[string]string, error) { + fl := make(map[string]string) + + fi, err := os.Stat(path) + if err != nil { + return nil, err + } + + var files []string + if fi.IsDir() { + files, err = filesForModel(path) + if err != nil { + return nil, err + } + } else { + files = []string{path} + } + + for _, f := range files { + digest, err := digestForFile(f) + if err != nil { + return nil, err + } + fl[f] = digest + } + + return fl, nil +} + +func digestForFile(filename string) (string, error) { + filepath, err := filepath.EvalSymlinks(filename) + if err != nil { + return "", err + } + + bin, err := os.Open(filepath) + if err != nil { + return "", err + } + defer bin.Close() + + hash := sha256.New() + if _, err := io.Copy(hash, bin); err != nil { + return "", err + } + return fmt.Sprintf("sha256:%x", hash.Sum(nil)), nil +} + +func filesForModel(path string) ([]string, error) { + detectContentType := func(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + + var b bytes.Buffer + b.Grow(512) + + if _, err := io.CopyN(&b, f, 512); err != nil && !errors.Is(err, io.EOF) { + return "", err + } + + contentType, _, _ := strings.Cut(http.DetectContentType(b.Bytes()), ";") + return contentType, nil + } + + glob := func(pattern, contentType string) ([]string, error) { + matches, err := filepath.Glob(pattern) + if err != nil { + return nil, err + } + + for _, safetensor := range matches { + if ct, err := detectContentType(safetensor); err != nil { + return nil, err + } else if ct != contentType { + return nil, fmt.Errorf("invalid content type: expected %s for %s", ct, safetensor) + } + } + + return matches, nil + } + + var files []string + if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 { + // safetensors files might be unresolved git lfs references; skip if they are + // covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors + files = append(files, st...) + } else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 { + // covers adapters.safetensors + files = append(files, st...) + } else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 { + // covers adapter_model.safetensors + files = append(files, st...) + } else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 { + // pytorch files might also be unresolved git lfs references; skip if they are + // covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin + files = append(files, pt...) + } else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/zip"); len(pt) > 0 { + // pytorch files might also be unresolved git lfs references; skip if they are + // covers consolidated.x.pth, consolidated.pth + files = append(files, pt...) + } else if gg, _ := glob(filepath.Join(path, "*.gguf"), "application/octet-stream"); len(gg) > 0 { + // covers gguf files ending in .gguf + files = append(files, gg...) + } else if gg, _ := glob(filepath.Join(path, "*.bin"), "application/octet-stream"); len(gg) > 0 { + // covers gguf files ending in .bin + files = append(files, gg...) + } else { + return nil, ErrModelNotFound + } + + // add configuration files, json files are detected as text/plain + js, err := glob(filepath.Join(path, "*.json"), "text/plain") + if err != nil { + return nil, err + } + files = append(files, js...) + + // bert models require a nested config.json + // TODO(mxyng): merge this with the glob above + js, err = glob(filepath.Join(path, "**/*.json"), "text/plain") + if err != nil { + return nil, err + } + files = append(files, js...) + + if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 { + // add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob + // tokenizer.model might be a unresolved git lfs reference; error if it is + files = append(files, tks...) + } else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 { + // some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B) + files = append(files, tks...) + } + + return files, nil +} + type Command struct { Name string Args string @@ -77,14 +303,14 @@ func (e *ParserError) Error() string { return e.Msg } -func ParseFile(r io.Reader) (*File, error) { +func ParseFile(r io.Reader) (*Modelfile, error) { var cmd Command var curr state var currLine int = 1 var b bytes.Buffer var role string - var f File + var f Modelfile tr := unicode.BOMOverride(unicode.UTF8.NewDecoder()) br := bufio.NewReader(transform.NewReader(r, tr)) @@ -328,3 +554,40 @@ func isValidCommand(cmd string) bool { return false } } + +func expandPathImpl(path string, currentUserFunc func() (*user.User, error), lookupUserFunc func(string) (*user.User, error)) (string, error) { + if strings.HasPrefix(path, "~") { + var homeDir string + + if path == "~" || strings.HasPrefix(path, "~/") { + // Current user's home directory + currentUser, err := currentUserFunc() + if err != nil { + return "", fmt.Errorf("failed to get current user: %w", err) + } + homeDir = currentUser.HomeDir + path = strings.TrimPrefix(path, "~") + } else { + // Specific user's home directory + parts := strings.SplitN(path[1:], "/", 2) + userInfo, err := lookupUserFunc(parts[0]) + if err != nil { + return "", fmt.Errorf("failed to find user '%s': %w", parts[0], err) + } + homeDir = userInfo.HomeDir + if len(parts) > 1 { + path = "/" + parts[1] + } else { + path = "" + } + } + + path = filepath.Join(homeDir, path) + } + + return filepath.Abs(path) +} + +func expandPath(path string) (string, error) { + return expandPathImpl(path, user.Current, user.Lookup) +} diff --git a/parser/parser_test.go b/parser/parser_test.go index b5614c2ed..169cf10fd 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -2,18 +2,24 @@ package parser import ( "bytes" + "crypto/sha256" "encoding/binary" "errors" "fmt" "io" + "os" "strings" "testing" "unicode/utf16" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "golang.org/x/text/encoding" "golang.org/x/text/encoding/unicode" + + "github.com/ollama/ollama/api" + "github.com/ollama/ollama/llm" ) func TestParseFileFile(t *testing.T) { @@ -673,3 +679,150 @@ func TestParseMultiByte(t *testing.T) { }) } } + +func TestCreateRequest(t *testing.T) { + cases := []struct { + input string + expected *api.CreateRequest + }{ + { + `FROM test`, + &api.CreateRequest{From: "test"}, + }, + { + `FROM test +TEMPLATE some template +`, + &api.CreateRequest{ + From: "test", + Template: "some template", + }, + }, + { + `FROM test +LICENSE single license +PARAMETER temperature 0.5 +MESSAGE user Hello +`, + &api.CreateRequest{ + From: "test", + License: []string{"single license"}, + Parameters: map[string]any{"temperature": float32(0.5)}, + Messages: []api.Message{ + {Role: "user", Content: "Hello"}, + }, + }, + }, + { + `FROM test +PARAMETER temperature 0.5 +PARAMETER top_k 1 +SYSTEM You are a bot. +LICENSE license1 +LICENSE license2 +MESSAGE user Hello there! +MESSAGE assistant Hi! How are you? +`, + &api.CreateRequest{ + From: "test", + License: []string{"license1", "license2"}, + System: "You are a bot.", + Parameters: map[string]any{"temperature": float32(0.5), "top_k": int64(1)}, + Messages: []api.Message{ + {Role: "user", Content: "Hello there!"}, + {Role: "assistant", Content: "Hi! How are you?"}, + }, + }, + }, + } + + for _, c := range cases { + s, err := unicode.UTF8.NewEncoder().String(c.input) + if err != nil { + t.Fatal(err) + } + + p, err := ParseFile(strings.NewReader(s)) + if err != nil { + t.Error(err) + } + + actual, err := p.CreateRequest() + if err != nil { + t.Error(err) + } + + if diff := cmp.Diff(actual, c.expected); diff != "" { + t.Errorf("mismatch (-got +want):\n%s", diff) + } + } +} + +func getSHA256Digest(t *testing.T, r io.Reader) (string, int64) { + t.Helper() + + h := sha256.New() + n, err := io.Copy(h, r) + if err != nil { + t.Fatal(err) + } + + return fmt.Sprintf("sha256:%x", h.Sum(nil)), n +} + +func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) (string, string) { + t.Helper() + + f, err := os.CreateTemp(t.TempDir(), "testbin.*.gguf") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + if err := llm.WriteGGUF(f, kv, ti); err != nil { + t.Fatal(err) + } + // Calculate sha256 of file + if _, err := f.Seek(0, 0); err != nil { + t.Fatal(err) + } + + digest, _ := getSHA256Digest(t, f) + + return f.Name(), digest +} + +func TestCreateRequestFiles(t *testing.T) { + name, digest := createBinFile(t, nil, nil) + + cases := []struct { + input string + expected *api.CreateRequest + }{ + { + fmt.Sprintf("FROM %s", name), + &api.CreateRequest{Files: map[string]string{name: digest}}, + }, + } + + for _, c := range cases { + s, err := unicode.UTF8.NewEncoder().String(c.input) + if err != nil { + t.Fatal(err) + } + + p, err := ParseFile(strings.NewReader(s)) + if err != nil { + t.Error(err) + } + + actual, err := p.CreateRequest() + if err != nil { + t.Error(err) + } + + if diff := cmp.Diff(actual, c.expected); diff != "" { + t.Errorf("mismatch (-got +want):\n%s", diff) + } + } +} diff --git a/server/create.go b/server/create.go new file mode 100644 index 000000000..f86c79a5e --- /dev/null +++ b/server/create.go @@ -0,0 +1,667 @@ +package server + +import ( + "bytes" + "cmp" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log/slog" + "net/http" + "os" + "path/filepath" + "slices" + "strings" + + "github.com/gin-gonic/gin" + + "github.com/ollama/ollama/api" + "github.com/ollama/ollama/convert" + "github.com/ollama/ollama/envconfig" + "github.com/ollama/ollama/format" + "github.com/ollama/ollama/llama" + "github.com/ollama/ollama/llm" + "github.com/ollama/ollama/template" + "github.com/ollama/ollama/types/errtypes" + "github.com/ollama/ollama/types/model" +) + +var ( + errNoFilesProvided = errors.New("no files provided to convert") + errOnlyOneAdapterSupported = errors.New("only one adapter is currently supported") + errOnlyGGUFSupported = errors.New("supplied file was not in GGUF format") + errUnknownType = errors.New("unknown type") +) + +func (s *Server) CreateHandler(c *gin.Context) { + var r api.CreateRequest + if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"}) + return + } else if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + name := model.ParseName(cmp.Or(r.Model, r.Name)) + if !name.IsValid() { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg}) + return + } + + name, err := getExistingName(name) + if err != nil { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + ch := make(chan any) + go func() { + defer close(ch) + fn := func(resp api.ProgressResponse) { + ch <- resp + } + + oldManifest, _ := ParseNamedManifest(name) + + var baseLayers []*layerGGML + if r.From != "" { + slog.Debug("create model from model name") + fromName := model.ParseName(r.From) + if !fromName.IsValid() { + ch <- gin.H{"error": errtypes.InvalidModelNameErrMsg, "status": http.StatusBadRequest} + return + } + + ctx, cancel := context.WithCancel(c.Request.Context()) + defer cancel() + + baseLayers, err = parseFromModel(ctx, fromName, fn) + if err != nil { + ch <- gin.H{"error": err.Error()} + } + } else if r.Files != nil { + baseLayers, err = convertModelFromFiles(r.Files, baseLayers, false, fn) + if err != nil { + for _, badReq := range []error{errNoFilesProvided, errOnlyGGUFSupported, errUnknownType} { + if errors.Is(err, badReq) { + ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest} + return + } + } + ch <- gin.H{"error": err.Error()} + return + } + } else { + ch <- gin.H{"error": "neither 'from' or 'files' was specified", "status": http.StatusBadRequest} + return + } + + var adapterLayers []*layerGGML + if r.Adapters != nil { + adapterLayers, err = convertModelFromFiles(r.Adapters, baseLayers, true, fn) + if err != nil { + for _, badReq := range []error{errNoFilesProvided, errOnlyOneAdapterSupported, errOnlyGGUFSupported, errUnknownType} { + if errors.Is(err, badReq) { + ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest} + return + } + } + ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest} + return + } + } + + if len(adapterLayers) > 0 { + baseLayers = append(baseLayers, adapterLayers...) + } + + if err := createModel(r, name, baseLayers, fn); err != nil { + if errors.Is(err, errBadTemplate) { + ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest} + return + } + ch <- gin.H{"error": err.Error()} + return + } + + if !envconfig.NoPrune() && oldManifest != nil { + if err := oldManifest.RemoveLayers(); err != nil { + ch <- gin.H{"error": err.Error()} + } + } + + ch <- api.ProgressResponse{Status: "success"} + }() + + if r.Stream != nil && !*r.Stream { + waitForStream(c, ch) + return + } + + streamResponse(c, ch) +} + +func convertModelFromFiles(files map[string]string, baseLayers []*layerGGML, isAdapter bool, fn func(resp api.ProgressResponse)) ([]*layerGGML, error) { + switch detectModelTypeFromFiles(files) { + case "safetensors": + layers, err := convertFromSafetensors(files, baseLayers, isAdapter, fn) + if err != nil { + slog.Error("error converting from safetensors", "error", err) + return nil, err + } + return layers, nil + case "gguf": + if len(files) == 0 { + return nil, errNoFilesProvided + } else if len(files) > 1 && isAdapter { + return nil, errOnlyOneAdapterSupported + } + + var digest string + var allLayers []*layerGGML + for _, v := range files { + digest = v + layers, err := ggufLayers(digest, fn) + if err != nil { + return nil, err + } + allLayers = append(allLayers, layers...) + } + return allLayers, nil + default: + return nil, errUnknownType + } +} + +func detectModelTypeFromFiles(files map[string]string) string { + // todo make this more robust by actually introspecting the files + for fn := range files { + if strings.HasSuffix(fn, ".safetensors") { + return "safetensors" + } else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".gguf") { + return "gguf" + } + } + + return "" +} + +func convertFromSafetensors(files map[string]string, baseLayers []*layerGGML, isAdapter bool, fn func(resp api.ProgressResponse)) ([]*layerGGML, error) { + tmpDir, err := os.MkdirTemp("", "ollama-safetensors") + if err != nil { + return nil, err + } + defer os.RemoveAll(tmpDir) + + for fp, digest := range files { + blobPath, err := GetBlobsPath(digest) + if err != nil { + return nil, err + } + if err := createLink(blobPath, filepath.Join(tmpDir, fp)); err != nil { + return nil, err + } + } + + t, err := os.CreateTemp(tmpDir, "fp16") + if err != nil { + return nil, err + } + defer t.Close() + + var mediaType string + if !isAdapter { + fn(api.ProgressResponse{Status: "converting model"}) + mediaType = "application/vnd.ollama.image.model" + if err := convert.ConvertModel(os.DirFS(tmpDir), t); err != nil { + return nil, err + } + } else { + kv, err := kvFromLayers(baseLayers) + if err != nil { + return nil, err + } + fn(api.ProgressResponse{Status: "converting adapter"}) + mediaType = "application/vnd.ollama.image.adapter" + if err := convert.ConvertAdapter(os.DirFS(tmpDir), t, kv); err != nil { + return nil, err + } + } + + if _, err := t.Seek(0, io.SeekStart); err != nil { + return nil, err + } + + layer, err := NewLayer(t, mediaType) + if err != nil { + return nil, err + } + + bin, err := layer.Open() + if err != nil { + return nil, err + } + + ggml, _, err := llm.DecodeGGML(bin, 0) + if err != nil { + return nil, err + } + layers := []*layerGGML{{layer, ggml}} + + if !isAdapter { + return detectChatTemplate(layers) + } + return layers, nil +} + +func kvFromLayers(baseLayers []*layerGGML) (llm.KV, error) { + for _, l := range baseLayers { + if l.GGML != nil { + return l.KV(), nil + } + } + return llm.KV{}, fmt.Errorf("no base model was found") +} + +func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML, fn func(resp api.ProgressResponse)) (err error) { + config := ConfigV2{ + OS: "linux", + Architecture: "amd64", + RootFS: RootFS{ + Type: "layers", + }, + } + + var layers []Layer + for _, layer := range baseLayers { + if layer.GGML != nil { + quantType := strings.ToUpper(cmp.Or(r.Quantize, r.Quantization)) + if quantType != "" && layer.GGML.Name() == "gguf" && layer.MediaType == "application/vnd.ollama.image.model" { + want, err := llm.ParseFileType(quantType) + if err != nil { + return err + } + + ft := layer.GGML.KV().FileType() + if !slices.Contains([]string{"F16", "F32"}, ft.String()) { + return errors.New("quantization is only supported for F16 and F32 models") + } else if ft != want { + layer, err = quantizeLayer(layer, quantType, fn) + if err != nil { + return err + } + } + } + config.ModelFormat = cmp.Or(config.ModelFormat, layer.GGML.Name()) + config.ModelFamily = cmp.Or(config.ModelFamily, layer.GGML.KV().Architecture()) + config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(layer.GGML.KV().ParameterCount())) + config.FileType = cmp.Or(config.FileType, layer.GGML.KV().FileType().String()) + config.ModelFamilies = append(config.ModelFamilies, layer.GGML.KV().Architecture()) + } + layers = append(layers, layer.Layer) + } + + if r.Template != "" { + layers, err = setTemplate(layers, r.Template) + if err != nil { + return err + } + } + + if r.System != "" { + layers, err = setSystem(layers, r.System) + if err != nil { + return err + } + } + + if r.License != nil { + switch l := r.License.(type) { + case string: + if l != "" { + layers, err = setLicense(layers, l) + if err != nil { + return err + } + } + case any: + var licenses []string + b, _ := json.Marshal(l) // re-marshal to JSON + if err := json.Unmarshal(b, &licenses); err != nil { + return err + } + for _, v := range licenses { + layers, err = setLicense(layers, v) + if err != nil { + return err + } + } + default: + return fmt.Errorf("unknown license type: %T", l) + } + } + + layers, err = setParameters(layers, r.Parameters) + if err != nil { + return err + } + + layers, err = setMessages(layers, r.Messages) + if err != nil { + return err + } + + configLayer, err := createConfigLayer(layers, config) + if err != nil { + return err + } + + for _, layer := range layers { + if layer.status != "" { + fn(api.ProgressResponse{Status: layer.status}) + } + } + + fn(api.ProgressResponse{Status: "writing manifest"}) + if err := WriteManifest(name, *configLayer, layers); err != nil { + return err + } + + return nil +} + +func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.ProgressResponse)) (*layerGGML, error) { + ft := layer.GGML.KV().FileType() + fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantizeType)}) + + want, err := llm.ParseFileType(quantizeType) + if err != nil { + return nil, err + } + + blob, err := GetBlobsPath(layer.Digest) + if err != nil { + return nil, err + } + + temp, err := os.CreateTemp(filepath.Dir(blob), quantizeType) + if err != nil { + return nil, err + } + defer temp.Close() + defer os.Remove(temp.Name()) + + if err := llama.Quantize(blob, temp.Name(), uint32(want)); err != nil { + return nil, err + } + + newLayer, err := NewLayer(temp, layer.MediaType) + if err != nil { + return nil, err + } + + if _, err := temp.Seek(0, io.SeekStart); err != nil { + return nil, err + } + + ggml, _, err := llm.DecodeGGML(temp, 0) + if err != nil { + slog.Error(fmt.Sprintf("error decoding ggml: %s\n", err)) + return nil, err + } + + return &layerGGML{newLayer, ggml}, nil +} + +func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML, error) { + var layers []*layerGGML + + fn(api.ProgressResponse{Status: "parsing GGUF"}) + blobPath, err := GetBlobsPath(digest) + if err != nil { + return nil, err + } + + blob, err := os.Open(blobPath) + if err != nil { + return nil, err + } + defer blob.Close() + + sr := io.NewSectionReader(blob, 0, 512) + contentType, err := detectContentType(sr) + if err != nil { + return nil, err + } + + if contentType != "gguf" { + slog.Error(fmt.Sprintf("unsupported content type: %s", contentType)) + return nil, errOnlyGGUFSupported + } + + stat, err := blob.Stat() + if err != nil { + return nil, err + } + + var offset int64 + for offset < stat.Size() { + ggml, n, err := llm.DecodeGGML(blob, 0) + if errors.Is(err, io.EOF) { + break + } else if err != nil { + return nil, err + } + + mediatype := "application/vnd.ollama.image.model" + if ggml.KV().Kind() == "adapter" { + mediatype = "application/vnd.ollama.image.adapter" + } else if _, ok := ggml.KV()[fmt.Sprintf("%s.vision.block_count", ggml.KV().Architecture())]; ok || ggml.KV().Kind() == "projector" { + mediatype = "application/vnd.ollama.image.projector" + } + + var layer Layer + if digest != "" && n == stat.Size() && offset == 0 { + layer, err = NewLayerFromLayer(digest, mediatype, blob.Name()) + if err != nil { + slog.Debug("could not create new layer from layer", "error", err) + return nil, err + } + } + + // Fallback to creating layer from file copy (either NewLayerFromLayer failed, or digest empty/n != stat.Size()) + if layer.Digest == "" { + layer, err = NewLayer(io.NewSectionReader(blob, offset, n), mediatype) + if err != nil { + return nil, err + } + } + + layers = append(layers, &layerGGML{layer, ggml}) + offset = n + } + + return detectChatTemplate(layers) +} + +func removeLayer(layers []Layer, mediatype string) []Layer { + return slices.DeleteFunc(layers, func(layer Layer) bool { + if layer.MediaType != mediatype { + return false + } + + if err := layer.Remove(); err != nil { + slog.Warn("couldn't remove blob", "digest", layer.Digest, "error", err) + return true + } + + return true + }) +} + +func setTemplate(layers []Layer, t string) ([]Layer, error) { + layers = removeLayer(layers, "application/vnd.ollama.image.template") + if _, err := template.Parse(t); err != nil { + return nil, fmt.Errorf("%w: %s", errBadTemplate, err) + } + if _, err := template.Parse(t); err != nil { + return nil, fmt.Errorf("%w: %s", errBadTemplate, err) + } + + blob := strings.NewReader(t) + layer, err := NewLayer(blob, "application/vnd.ollama.image.template") + if err != nil { + return nil, err + } + + layers = append(layers, layer) + return layers, nil +} + +func setSystem(layers []Layer, s string) ([]Layer, error) { + layers = removeLayer(layers, "application/vnd.ollama.image.system") + if s != "" { + blob := strings.NewReader(s) + layer, err := NewLayer(blob, "application/vnd.ollama.image.system") + if err != nil { + return nil, err + } + layers = append(layers, layer) + } + return layers, nil +} + +func setLicense(layers []Layer, l string) ([]Layer, error) { + blob := strings.NewReader(l) + layer, err := NewLayer(blob, "application/vnd.ollama.image.license") + if err != nil { + return nil, err + } + layers = append(layers, layer) + return layers, nil +} + +func setParameters(layers []Layer, p map[string]any) ([]Layer, error) { + if p == nil { + p = make(map[string]any) + } + for _, layer := range layers { + if layer.MediaType != "application/vnd.ollama.image.params" { + continue + } + + digestPath, err := GetBlobsPath(layer.Digest) + if err != nil { + return nil, err + } + + fn, err := os.Open(digestPath) + if err != nil { + return nil, err + } + defer fn.Close() + + var existing map[string]any + if err := json.NewDecoder(fn).Decode(&existing); err != nil { + return nil, err + } + + for k, v := range existing { + if _, exists := p[k]; exists { + continue + } + p[k] = v + } + } + + if len(p) == 0 { + return layers, nil + } + + layers = removeLayer(layers, "application/vnd.ollama.image.params") + + var b bytes.Buffer + if err := json.NewEncoder(&b).Encode(p); err != nil { + return nil, err + } + layer, err := NewLayer(&b, "application/vnd.ollama.image.params") + if err != nil { + return nil, err + } + layers = append(layers, layer) + return layers, nil +} + +func setMessages(layers []Layer, m []api.Message) ([]Layer, error) { + // this leaves the old messages intact if no new messages were specified + // which may not be the correct behaviour + if len(m) == 0 { + return layers, nil + } + + fmt.Printf("removing old messages\n") + layers = removeLayer(layers, "application/vnd.ollama.image.messages") + var b bytes.Buffer + if err := json.NewEncoder(&b).Encode(m); err != nil { + return nil, err + } + layer, err := NewLayer(&b, "application/vnd.ollama.image.messages") + if err != nil { + return nil, err + } + layers = append(layers, layer) + return layers, nil +} + +func createConfigLayer(layers []Layer, config ConfigV2) (*Layer, error) { + digests := make([]string, len(layers)) + for i, layer := range layers { + digests[i] = layer.Digest + } + config.RootFS.DiffIDs = digests + + var b bytes.Buffer + if err := json.NewEncoder(&b).Encode(config); err != nil { + return nil, err + } + layer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json") + if err != nil { + return nil, err + } + return &layer, nil +} + +func createLink(src, dst string) error { + // make any subdirs for dst + if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil { + return err + } + + _ = os.Remove(dst) + if err := os.Symlink(src, dst); err != nil { + if err := copyFile(src, dst); err != nil { + return err + } + } + return nil +} + +func copyFile(src, dst string) error { + srcFile, err := os.Open(src) + if err != nil { + return err + } + defer srcFile.Close() + + dstFile, err := os.Create(dst) + if err != nil { + return err + } + defer dstFile.Close() + + _, err = io.Copy(dstFile, srcFile) + return err +} diff --git a/server/images.go b/server/images.go index 4006584fa..6a9dc0f05 100644 --- a/server/images.go +++ b/server/images.go @@ -2,7 +2,6 @@ package server import ( "bytes" - "cmp" "context" "crypto/sha256" "encoding/hex" @@ -24,8 +23,6 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/envconfig" - "github.com/ollama/ollama/format" - "github.com/ollama/ollama/llama" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/parser" "github.com/ollama/ollama/template" @@ -121,7 +118,7 @@ func (m *Model) CheckCapabilities(caps ...Capability) error { } func (m *Model) String() string { - var modelfile parser.File + var modelfile parser.Modelfile modelfile.Commands = append(modelfile.Commands, parser.Command{ Name: "model", @@ -330,328 +327,6 @@ func GetModel(name string) (*Model, error) { return model, nil } -func realpath(rel, from string) string { - abspath, err := filepath.Abs(from) - if err != nil { - return from - } - - home, err := os.UserHomeDir() - if err != nil { - return abspath - } - - if from == "~" { - return home - } else if strings.HasPrefix(from, "~/") { - return filepath.Join(home, from[2:]) - } - - if _, err := os.Stat(filepath.Join(rel, from)); err == nil { - // this is a file relative to the Modelfile - return filepath.Join(rel, from) - } - - return abspath -} - -func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantization string, modelfile *parser.File, fn func(resp api.ProgressResponse)) (err error) { - config := ConfigV2{ - OS: "linux", - Architecture: "amd64", - RootFS: RootFS{ - Type: "layers", - }, - } - - var messages []*api.Message - parameters := make(map[string]any) - - var layers []Layer - var baseLayers []*layerGGML - for _, c := range modelfile.Commands { - mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name) - command := c.Name - - switch command { - case "model", "adapter": - if name := model.ParseName(c.Args); name.IsValid() && command == "model" { - name, err := getExistingName(name) - if err != nil { - return err - } - baseLayers, err = parseFromModel(ctx, name, fn) - if err != nil { - return err - } - } else if strings.HasPrefix(c.Args, "@") { - digest := strings.TrimPrefix(c.Args, "@") - if ib, ok := intermediateBlobs[digest]; ok { - p, err := GetBlobsPath(ib) - if err != nil { - return err - } - - if _, err := os.Stat(p); errors.Is(err, os.ErrNotExist) { - // pass - } else if err != nil { - return err - } else { - fn(api.ProgressResponse{Status: fmt.Sprintf("using cached layer %s", ib)}) - digest = ib - } - } - - blobpath, err := GetBlobsPath(digest) - if err != nil { - return err - } - - blob, err := os.Open(blobpath) - if err != nil { - return err - } - defer blob.Close() - - baseLayers, err = parseFromFile(ctx, command, baseLayers, blob, digest, fn) - if err != nil { - return err - } - } else if file, err := os.Open(realpath(modelFileDir, c.Args)); err == nil { - defer file.Close() - - baseLayers, err = parseFromFile(ctx, command, baseLayers, file, "", fn) - if err != nil { - return err - } - } else { - return fmt.Errorf("invalid model reference: %s", c.Args) - } - - for _, baseLayer := range baseLayers { - if quantization != "" && - baseLayer.MediaType == "application/vnd.ollama.image.model" && - baseLayer.GGML != nil && - baseLayer.GGML.Name() == "gguf" { - want, err := llm.ParseFileType(quantization) - if err != nil { - return err - } - - ft := baseLayer.GGML.KV().FileType() - if !slices.Contains([]string{"F16", "F32"}, ft.String()) { - return errors.New("quantization is only supported for F16 and F32 models") - } else if want != ft { - fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantization)}) - - blob, err := GetBlobsPath(baseLayer.Digest) - if err != nil { - return err - } - - temp, err := os.CreateTemp(filepath.Dir(blob), quantization) - if err != nil { - return err - } - defer temp.Close() - defer os.Remove(temp.Name()) - - if err := llama.Quantize(blob, temp.Name(), uint32(want)); err != nil { - return err - } - - layer, err := NewLayer(temp, baseLayer.MediaType) - if err != nil { - return err - } - - if _, err := temp.Seek(0, io.SeekStart); err != nil { - return err - } - - ggml, _, err := llm.DecodeGGML(temp, 0) - if err != nil { - return err - } - - baseLayer.Layer = layer - baseLayer.GGML = ggml - } - } - - if baseLayer.GGML != nil { - config.ModelFormat = cmp.Or(config.ModelFormat, baseLayer.GGML.Name()) - config.ModelFamily = cmp.Or(config.ModelFamily, baseLayer.GGML.KV().Architecture()) - config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount())) - config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType().String()) - config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture()) - } - - layers = append(layers, baseLayer.Layer) - } - case "license", "template", "system": - if c.Name == "template" { - if _, err := template.Parse(c.Args); err != nil { - return fmt.Errorf("%w: %s", errBadTemplate, err) - } - } - - if c.Name != "license" { - // replace - layers = slices.DeleteFunc(layers, func(layer Layer) bool { - if layer.MediaType != mediatype { - return false - } - - if err := layer.Remove(); err != nil { - return false - } - - return true - }) - } - - blob := strings.NewReader(c.Args) - layer, err := NewLayer(blob, mediatype) - if err != nil { - return err - } - - layers = append(layers, layer) - case "message": - role, content, ok := strings.Cut(c.Args, ": ") - if !ok { - return fmt.Errorf("invalid message: %s", c.Args) - } - - messages = append(messages, &api.Message{Role: role, Content: content}) - default: - ps, err := api.FormatParams(map[string][]string{c.Name: {c.Args}}) - if err != nil { - return err - } - - for k, v := range ps { - if ks, ok := parameters[k].([]string); ok { - parameters[k] = append(ks, v.([]string)...) - } else if vs, ok := v.([]string); ok { - parameters[k] = vs - } else { - parameters[k] = v - } - } - } - } - - var err2 error - layers = slices.DeleteFunc(layers, func(layer Layer) bool { - switch layer.MediaType { - case "application/vnd.ollama.image.message": - // if there are new messages, remove the inherited ones - if len(messages) > 0 { - return true - } - - return false - case "application/vnd.ollama.image.params": - // merge inherited parameters with new ones - r, err := layer.Open() - if err != nil { - err2 = err - return false - } - defer r.Close() - - var ps map[string]any - if err := json.NewDecoder(r).Decode(&ps); err != nil { - err2 = err - return false - } - - for k, v := range ps { - if _, ok := parameters[k]; !ok { - parameters[k] = v - } - } - - return true - default: - return false - } - }) - - if err2 != nil { - return err2 - } - - if len(messages) > 0 { - var b bytes.Buffer - if err := json.NewEncoder(&b).Encode(messages); err != nil { - return err - } - - layer, err := NewLayer(&b, "application/vnd.ollama.image.messages") - if err != nil { - return err - } - - layers = append(layers, layer) - } - - if len(parameters) > 0 { - var b bytes.Buffer - if err := json.NewEncoder(&b).Encode(parameters); err != nil { - return err - } - - layer, err := NewLayer(&b, "application/vnd.ollama.image.params") - if err != nil { - return err - } - - layers = append(layers, layer) - } - - digests := make([]string, len(layers)) - for i, layer := range layers { - digests[i] = layer.Digest - } - - config.RootFS.DiffIDs = digests - - var b bytes.Buffer - if err := json.NewEncoder(&b).Encode(config); err != nil { - return err - } - - configLayer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json") - if err != nil { - return err - } - - for _, layer := range append(layers, configLayer) { - if layer.status != "" { - fn(api.ProgressResponse{Status: layer.status}) - } - } - - old, _ := ParseNamedManifest(name) - - fn(api.ProgressResponse{Status: "writing manifest"}) - if err := WriteManifest(name, configLayer, layers); err != nil { - return err - } - - if !envconfig.NoPrune() && old != nil { - if err := old.RemoveLayers(); err != nil { - return err - } - } - - fn(api.ProgressResponse{Status: "success"}) - return nil -} - func CopyModel(src, dst model.Name) error { if !dst.IsFullyQualified() { return model.Unqualified(dst) diff --git a/server/model.go b/server/model.go index 4926d6ce2..2c43e6179 100644 --- a/server/model.go +++ b/server/model.go @@ -1,7 +1,6 @@ package server import ( - "archive/zip" "bytes" "context" "encoding/json" @@ -11,13 +10,11 @@ import ( "log/slog" "net/http" "os" - "path/filepath" "slices" "strings" "text/template/parse" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/convert" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/template" "github.com/ollama/ollama/types/model" @@ -81,148 +78,6 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe return layers, nil } -func parseFromZipFile(_ context.Context, command string, baseLayers []*layerGGML, f *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) { - fi, err := f.Stat() - if err != nil { - return nil, err - } - - r, err := zip.NewReader(f, fi.Size()) - if err != nil { - return nil, err - } - - p, err := os.MkdirTemp(filepath.Dir(f.Name()), "") - if err != nil { - return nil, err - } - defer os.RemoveAll(p) - - fn(api.ProgressResponse{Status: "converting model"}) - // TODO(mxyng): this should write directly into a layer - // e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model") - t, err := os.CreateTemp(p, "fp16") - if err != nil { - return nil, err - } - defer t.Close() - defer os.Remove(t.Name()) - - var layerType string - - switch command { - case "adapter": - var baseModel *llm.GGML - for _, l := range baseLayers { - if l.GGML != nil { - baseModel = l.GGML - break - } - } - - if baseModel == nil { - return nil, fmt.Errorf("no base model specified for the adapter") - } - - if err := convert.ConvertAdapter(convert.NewZipReader(r, p, 32<<20), t, baseModel.KV()); err != nil { - return nil, err - } - layerType = "application/vnd.ollama.image.adapter" - case "model": - if err := convert.ConvertModel(convert.NewZipReader(r, p, 32<<20), t); err != nil { - return nil, err - } - layerType = "application/vnd.ollama.image.model" - } - - if _, err := t.Seek(0, io.SeekStart); err != nil { - return nil, err - } - - layer, err := NewLayer(t, layerType) - if err != nil { - return nil, err - } - - bin, err := layer.Open() - if err != nil { - return nil, err - } - defer bin.Close() - - ggml, _, err := llm.DecodeGGML(bin, 0) - if err != nil { - return nil, err - } - - layers = append(layers, &layerGGML{layer, ggml}) - - intermediateBlobs[digest] = layer.Digest - return detectChatTemplate(layers) -} - -func parseFromFile(ctx context.Context, command string, baseLayers []*layerGGML, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) { - sr := io.NewSectionReader(file, 0, 512) - contentType, err := detectContentType(sr) - if err != nil { - return nil, err - } - - switch contentType { - case "gguf", "ggla": - // noop - case "application/zip": - return parseFromZipFile(ctx, command, baseLayers, file, digest, fn) - default: - return nil, fmt.Errorf("unsupported content type: %s", contentType) - } - - stat, err := file.Stat() - if err != nil { - return nil, err - } - - var offset int64 - for offset < stat.Size() { - ggml, n, err := llm.DecodeGGML(file, 0) - if errors.Is(err, io.EOF) { - break - } else if err != nil { - return nil, err - } - - mediatype := "application/vnd.ollama.image.model" - if ggml.Name() == "ggla" || ggml.KV().Kind() == "adapter" { - mediatype = "application/vnd.ollama.image.adapter" - } - - if _, ok := ggml.KV()[fmt.Sprintf("%s.vision.block_count", ggml.KV().Architecture())]; ok || ggml.KV().Kind() == "projector" { - mediatype = "application/vnd.ollama.image.projector" - } - - var layer Layer - if digest != "" && n == stat.Size() && offset == 0 { - layer, err = NewLayerFromLayer(digest, mediatype, file.Name()) - if err != nil { - slog.Debug("could not create new layer from layer", "error", err) - } - } - - // Fallback to creating layer from file copy (either NewLayerFromLayer failed, or digest empty/n != stat.Size()) - if layer.Digest == "" { - layer, err = NewLayer(io.NewSectionReader(file, offset, n), mediatype) - if err != nil { - return nil, err - } - } - - layers = append(layers, &layerGGML{layer, ggml}) - offset = n - } - - return detectChatTemplate(layers) -} - func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) { for _, layer := range layers { if s := layer.GGML.KV().ChatTemplate(); s != "" { diff --git a/server/model_test.go b/server/model_test.go index 47c4728ed..e5c2f2bb2 100644 --- a/server/model_test.go +++ b/server/model_test.go @@ -2,10 +2,8 @@ package server import ( "bytes" - "context" "encoding/json" "fmt" - "io" "os" "path/filepath" "testing" @@ -13,7 +11,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/llm" "github.com/ollama/ollama/template" ) @@ -139,87 +136,6 @@ The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`, } } -func TestParseFromFileFromLayer(t *testing.T) { - tempModels := t.TempDir() - t.Setenv("OLLAMA_MODELS", tempModels) - - file, err := os.CreateTemp(tempModels, "") - if err != nil { - t.Fatalf("failed to open file: %v", err) - } - defer file.Close() - if err := llm.WriteGGUF(file, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil { - t.Fatalf("failed to write gguf: %v", err) - } - - if _, err := file.Seek(0, io.SeekStart); err != nil { - t.Fatalf("failed to seek to start: %v", err) - } - - layers, err := parseFromFile(context.Background(), "model", []*layerGGML{}, file, "", func(api.ProgressResponse) {}) - if err != nil { - t.Fatalf("failed to parse from file: %v", err) - } - - if len(layers) != 1 { - t.Fatalf("got %d != want 1", len(layers)) - } - - if _, err := file.Seek(0, io.SeekStart); err != nil { - t.Fatalf("failed to seek to start: %v", err) - } - - layers2, err := parseFromFile(context.Background(), "model", []*layerGGML{}, file, layers[0].Digest, func(api.ProgressResponse) {}) - if err != nil { - t.Fatalf("failed to parse from file: %v", err) - } - if len(layers2) != 1 { - t.Fatalf("got %d != want 1", len(layers2)) - } - - if layers[0].Digest != layers2[0].Digest { - t.Fatalf("got %s != want %s", layers[0].Digest, layers2[0].Digest) - } - - if layers[0].Size != layers2[0].Size { - t.Fatalf("got %d != want %d", layers[0].Size, layers2[0].Size) - } - - if layers[0].MediaType != layers2[0].MediaType { - t.Fatalf("got %v != want %v", layers[0].MediaType, layers2[0].MediaType) - } -} - -func TestParseLayerFromCopy(t *testing.T) { - tempModels := t.TempDir() - t.Setenv("OLLAMA_MODELS", tempModels) - - file2, err := os.CreateTemp(tempModels, "") - if err != nil { - t.Fatalf("failed to open file: %v", err) - } - defer file2.Close() - - for range 5 { - if err := llm.WriteGGUF(file2, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil { - t.Fatalf("failed to write gguf: %v", err) - } - } - - if _, err := file2.Seek(0, io.SeekStart); err != nil { - t.Fatalf("failed to seek to start: %v", err) - } - - layers, err := parseFromFile(context.Background(), "model", []*layerGGML{}, file2, "", func(api.ProgressResponse) {}) - if err != nil { - t.Fatalf("failed to parse from file: %v", err) - } - - if len(layers) != 5 { - t.Fatalf("got %d != want 5", len(layers)) - } -} - func TestParseObjects(t *testing.T) { tests := []struct { input string diff --git a/server/routes.go b/server/routes.go index f3b78927c..543b823d6 100644 --- a/server/routes.go +++ b/server/routes.go @@ -33,7 +33,6 @@ import ( "github.com/ollama/ollama/llm" "github.com/ollama/ollama/model/mllama" "github.com/ollama/ollama/openai" - "github.com/ollama/ollama/parser" "github.com/ollama/ollama/runners" "github.com/ollama/ollama/template" "github.com/ollama/ollama/types/errtypes" @@ -688,77 +687,6 @@ func getExistingName(n model.Name) (model.Name, error) { return n, nil } -func (s *Server) CreateHandler(c *gin.Context) { - var r api.CreateRequest - if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"}) - return - } else if err != nil { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) - return - } - - name := model.ParseName(cmp.Or(r.Model, r.Name)) - if !name.IsValid() { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg}) - return - } - - name, err := getExistingName(name) - if err != nil { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) - return - } - - if r.Path == "" && r.Modelfile == "" { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or Modelfile are required"}) - return - } - - var sr io.Reader = strings.NewReader(r.Modelfile) - if r.Path != "" && r.Modelfile == "" { - f, err := os.Open(r.Path) - if err != nil { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)}) - return - } - defer f.Close() - - sr = f - } - - f, err := parser.ParseFile(sr) - if err != nil { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) - return - } - - ch := make(chan any) - go func() { - defer close(ch) - fn := func(resp api.ProgressResponse) { - ch <- resp - } - - ctx, cancel := context.WithCancel(c.Request.Context()) - defer cancel() - - quantization := cmp.Or(r.Quantize, r.Quantization) - if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); errors.Is(err, errBadTemplate) { - ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest} - } else if err != nil { - ch <- gin.H{"error": err.Error()} - } - }() - - if r.Stream != nil && !*r.Stream { - waitForStream(c, ch) - return - } - - streamResponse(c, ch) -} - func (s *Server) DeleteHandler(c *gin.Context) { var r api.DeleteRequest if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) { diff --git a/server/routes_create_test.go b/server/routes_create_test.go index 09521753f..9c85eb9d5 100644 --- a/server/routes_create_test.go +++ b/server/routes_create_test.go @@ -11,18 +11,23 @@ import ( "os" "path/filepath" "slices" + "strings" "testing" "github.com/gin-gonic/gin" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/llm" ) var stream bool = false -func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string { +func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) (string, string) { t.Helper() + t.Setenv("OLLAMA_MODELS", cmp.Or(os.Getenv("OLLAMA_MODELS"), t.TempDir())) + + modelDir := envconfig.Models() f, err := os.CreateTemp(t.TempDir(), "") if err != nil { @@ -33,8 +38,21 @@ func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string { if err := llm.WriteGGUF(f, kv, ti); err != nil { t.Fatal(err) } + // Calculate sha256 of file + if _, err := f.Seek(0, 0); err != nil { + t.Fatal(err) + } - return f.Name() + digest, _ := GetSHA256Digest(f) + if err := f.Close(); err != nil { + t.Fatal(err) + } + + if err := createLink(f.Name(), filepath.Join(modelDir, "blobs", fmt.Sprintf("sha256-%s", strings.TrimPrefix(digest, "sha256:")))); err != nil { + t.Fatal(err) + } + + return f.Name(), digest } type responseRecorder struct { @@ -93,13 +111,17 @@ func TestCreateFromBin(t *testing.T) { t.Setenv("OLLAMA_MODELS", p) var s Server + + _, digest := createBinFile(t, nil, nil) + w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + Stream: &stream, }) if w.Code != http.StatusOK { + fmt.Println(w) t.Fatalf("expected status code 200, actual %d", w.Code) } @@ -120,10 +142,12 @@ func TestCreateFromModel(t *testing.T) { t.Setenv("OLLAMA_MODELS", p) var s Server + _, digest := createBinFile(t, nil, nil) + w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + Stream: &stream, }) if w.Code != http.StatusOK { @@ -135,9 +159,9 @@ func TestCreateFromModel(t *testing.T) { }) w = createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test2", - Modelfile: "FROM test", - Stream: &stream, + Name: "test2", + From: "test", + Stream: &stream, }) if w.Code != http.StatusOK { @@ -162,10 +186,12 @@ func TestCreateRemovesLayers(t *testing.T) { t.Setenv("OLLAMA_MODELS", p) var s Server + _, digest := createBinFile(t, nil, nil) w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + Template: "{{ .Prompt }}", + Stream: &stream, }) if w.Code != http.StatusOK { @@ -183,9 +209,10 @@ func TestCreateRemovesLayers(t *testing.T) { }) w = createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + Template: "{{ .System }} {{ .Prompt }}", + Stream: &stream, }) if w.Code != http.StatusOK { @@ -210,10 +237,12 @@ func TestCreateUnsetsSystem(t *testing.T) { t.Setenv("OLLAMA_MODELS", p) var s Server + _, digest := createBinFile(t, nil, nil) w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nSYSTEM Say hi!", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + System: "Say hi!", + Stream: &stream, }) if w.Code != http.StatusOK { @@ -231,9 +260,10 @@ func TestCreateUnsetsSystem(t *testing.T) { }) w = createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nSYSTEM \"\"", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + System: "", + Stream: &stream, }) if w.Code != http.StatusOK { @@ -245,19 +275,9 @@ func TestCreateUnsetsSystem(t *testing.T) { }) checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ - filepath.Join(p, "blobs", "sha256-67d4b8d106af2a5b100a46e9bdc038c71eef2a35c9abac784092654212f97cf5"), filepath.Join(p, "blobs", "sha256-a4e5e156ddec27e286f75328784d7106b60a4eb1d246e950a001a3f944fbda99"), - filepath.Join(p, "blobs", "sha256-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"), + filepath.Join(p, "blobs", "sha256-ca239d7bd8ea90e4a5d2e6bf88f8d74a47b14336e73eb4e18bed4dd325018116"), }) - - bts, err := os.ReadFile(filepath.Join(p, "blobs", "sha256-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")) - if err != nil { - t.Fatal(err) - } - - if string(bts) != "" { - t.Fatalf("expected empty string, actual %s", string(bts)) - } } func TestCreateMergeParameters(t *testing.T) { @@ -267,10 +287,16 @@ func TestCreateMergeParameters(t *testing.T) { t.Setenv("OLLAMA_MODELS", p) var s Server + _, digest := createBinFile(t, nil, nil) w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nPARAMETER temperature 1\nPARAMETER top_k 10\nPARAMETER stop USER:\nPARAMETER stop ASSISTANT:", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + Parameters: map[string]any{ + "temperature": 1, + "top_k": 10, + "stop": []string{"USER:", "ASSISTANT:"}, + }, + Stream: &stream, }) if w.Code != http.StatusOK { @@ -289,9 +315,13 @@ func TestCreateMergeParameters(t *testing.T) { // in order to merge parameters, the second model must be created FROM the first w = createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test2", - Modelfile: "FROM test\nPARAMETER temperature 0.6\nPARAMETER top_p 0.7", - Stream: &stream, + Name: "test2", + From: "test", + Parameters: map[string]any{ + "temperature": 0.6, + "top_p": 0.7, + }, + Stream: &stream, }) if w.Code != http.StatusOK { @@ -303,6 +333,22 @@ func TestCreateMergeParameters(t *testing.T) { filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"), }) + // Display contents of each blob in the directory + blobDir := filepath.Join(p, "blobs") + entries, err := os.ReadDir(blobDir) + if err != nil { + t.Fatalf("failed to read blobs directory: %v", err) + } + + for _, entry := range entries { + blobPath := filepath.Join(blobDir, entry.Name()) + content, err := os.ReadFile(blobPath) + if err != nil { + t.Fatalf("failed to read blob %s: %v", entry.Name(), err) + } + t.Logf("Contents of %s:\n%s", entry.Name(), string(content)) + } + checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ filepath.Join(p, "blobs", "sha256-1d0ad71299d48c2fb7ae2b98e683643e771f8a5b72be34942af90d97a91c1e37"), filepath.Join(p, "blobs", "sha256-4a384beaf47a9cbe452dfa5ab70eea691790f3b35a832d12933a1996685bf2b6"), @@ -327,9 +373,14 @@ func TestCreateMergeParameters(t *testing.T) { // slices are replaced w = createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test2", - Modelfile: "FROM test\nPARAMETER temperature 0.6\nPARAMETER top_p 0.7\nPARAMETER stop <|endoftext|>", - Stream: &stream, + Name: "test2", + From: "test", + Parameters: map[string]any{ + "temperature": 0.6, + "top_p": 0.7, + "stop": []string{"<|endoftext|>"}, + }, + Stream: &stream, }) if w.Code != http.StatusOK { @@ -371,10 +422,25 @@ func TestCreateReplacesMessages(t *testing.T) { t.Setenv("OLLAMA_MODELS", p) var s Server + _, digest := createBinFile(t, nil, nil) w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nMESSAGE assistant \"What is my purpose?\"\nMESSAGE user \"You run tests.\"\nMESSAGE assistant \"Oh, my god.\"", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + Messages: []api.Message{ + { + Role: "assistant", + Content: "What is my purpose?", + }, + { + Role: "user", + Content: "You run tests.", + }, + { + Role: "assistant", + Content: "Oh, my god.", + }, + }, + Stream: &stream, }) if w.Code != http.StatusOK { @@ -392,9 +458,23 @@ func TestCreateReplacesMessages(t *testing.T) { }) w = createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test2", - Modelfile: "FROM test\nMESSAGE assistant \"You're a test, Harry.\"\nMESSAGE user \"I-I'm a what?\"\nMESSAGE assistant \"A test. And a thumping good one at that, I'd wager.\"", - Stream: &stream, + Name: "test2", + From: "test", + Messages: []api.Message{ + { + Role: "assistant", + Content: "You're a test, Harry.", + }, + { + Role: "user", + Content: "I-I'm a what?", + }, + { + Role: "assistant", + Content: "A test. And a thumping good one at that, I'd wager.", + }, + }, + Stream: &stream, }) if w.Code != http.StatusOK { @@ -406,12 +486,13 @@ func TestCreateReplacesMessages(t *testing.T) { filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test2", "latest"), }) + // Old layers will not have been pruned checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ filepath.Join(p, "blobs", "sha256-298baeaf6928a60cf666d88d64a1ba606feb43a2865687c39e40652e407bffc4"), - filepath.Join(p, "blobs", "sha256-4f48b25fe9969564c82f58eb1cedbdff6484cc0baf474bc6c2a9b37c8da3362a"), filepath.Join(p, "blobs", "sha256-a4e5e156ddec27e286f75328784d7106b60a4eb1d246e950a001a3f944fbda99"), filepath.Join(p, "blobs", "sha256-a60ecc9da299ec7ede453f99236e5577fd125e143689b646d9f0ddc9971bf4db"), filepath.Join(p, "blobs", "sha256-e0e27d47045063ccb167ae852c51d49a98eab33fabaee4633fdddf97213e40b5"), + filepath.Join(p, "blobs", "sha256-f4e2c3690efef1b4b63ba1e1b2744ffeb6a7438a0110b86596069f6d9999c80b"), }) type message struct { @@ -448,10 +529,13 @@ func TestCreateTemplateSystem(t *testing.T) { t.Setenv("OLLAMA_MODELS", p) var s Server + _, digest := createBinFile(t, nil, nil) w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}\nSYSTEM Say hello!\nTEMPLATE {{ .System }} {{ .Prompt }}\nSYSTEM Say bye!", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + Template: "{{ .System }} {{ .Prompt }}", + System: "Say bye!", + Stream: &stream, }) if w.Code != http.StatusOK { @@ -488,10 +572,12 @@ func TestCreateTemplateSystem(t *testing.T) { } t.Run("incomplete template", func(t *testing.T) { + _, digest := createBinFile(t, nil, nil) w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + Template: "{{ .Prompt", + Stream: &stream, }) if w.Code != http.StatusBadRequest { @@ -500,10 +586,12 @@ func TestCreateTemplateSystem(t *testing.T) { }) t.Run("template with unclosed if", func(t *testing.T) { + _, digest := createBinFile(t, nil, nil) w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ if .Prompt }}", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + Template: "{{ if .Prompt }}", + Stream: &stream, }) if w.Code != http.StatusBadRequest { @@ -512,10 +600,12 @@ func TestCreateTemplateSystem(t *testing.T) { }) t.Run("template with undefined function", func(t *testing.T) { + _, digest := createBinFile(t, nil, nil) w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ Prompt }}", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + Template: "{{ Prompt }}", + Stream: &stream, }) if w.Code != http.StatusBadRequest { @@ -531,10 +621,12 @@ func TestCreateLicenses(t *testing.T) { t.Setenv("OLLAMA_MODELS", p) var s Server + _, digest := createBinFile(t, nil, nil) w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + License: []string{"MIT", "Apache-2.0"}, + Stream: &stream, }) if w.Code != http.StatusOK { @@ -579,11 +671,12 @@ func TestCreateDetectTemplate(t *testing.T) { var s Server t.Run("matched", func(t *testing.T) { + _, digest := createBinFile(t, llm.KV{ + "tokenizer.chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}", + }, nil) w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ - "tokenizer.chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}", - }, nil)), + Name: "test", + Files: map[string]string{"test.gguf": digest}, Stream: &stream, }) @@ -600,10 +693,11 @@ func TestCreateDetectTemplate(t *testing.T) { }) t.Run("unmatched", func(t *testing.T) { + _, digest := createBinFile(t, nil, nil) w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), - Stream: &stream, + Name: "test", + Files: map[string]string{"test.gguf": digest}, + Stream: &stream, }) if w.Code != http.StatusOK { diff --git a/server/routes_delete_test.go b/server/routes_delete_test.go index 5a337e794..2e00c08df 100644 --- a/server/routes_delete_test.go +++ b/server/routes_delete_test.go @@ -3,7 +3,6 @@ package server import ( "bytes" "encoding/json" - "fmt" "net/http" "path/filepath" "testing" @@ -22,9 +21,10 @@ func TestDelete(t *testing.T) { var s Server + _, digest := createBinFile(t, nil, nil) w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test", - Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), + Name: "test", + Files: map[string]string{"test.gguf": digest}, }) if w.Code != http.StatusOK { @@ -32,8 +32,9 @@ func TestDelete(t *testing.T) { } w = createRequest(t, s.CreateHandler, api.CreateRequest{ - Name: "test2", - Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)), + Name: "test2", + Files: map[string]string{"test.gguf": digest}, + Template: "{{ .System }} {{ .Prompt }}", }) if w.Code != http.StatusOK { diff --git a/server/routes_generate_test.go b/server/routes_generate_test.go index 737fa79c1..0900fa2a0 100644 --- a/server/routes_generate_test.go +++ b/server/routes_generate_test.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "encoding/json" - "fmt" "io" "net/http" "strings" @@ -89,10 +88,34 @@ func TestGenerateChat(t *testing.T) { go s.sched.Run(context.TODO()) + _, digest := createBinFile(t, llm.KV{ + "general.architecture": "llama", + "llama.block_count": uint32(1), + "llama.context_length": uint32(8192), + "llama.embedding_length": uint32(4096), + "llama.attention.head_count": uint32(32), + "llama.attention.head_count_kv": uint32(8), + "tokenizer.ggml.tokens": []string{""}, + "tokenizer.ggml.scores": []float32{0}, + "tokenizer.ggml.token_type": []int32{0}, + }, []llm.Tensor{ + {Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.ffn_gate.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.ffn_up.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.ffn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.attn_k.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.attn_output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.attn_q.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.attn_v.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + }) + w := createRequest(t, s.CreateHandler, api.CreateRequest{ Model: "test", - Modelfile: fmt.Sprintf(`FROM %s - TEMPLATE """ + Files: map[string]string{"file.gguf": digest}, + Template: ` {{- if .Tools }} {{ .Tools }} {{ end }} @@ -100,30 +123,7 @@ func TestGenerateChat(t *testing.T) { {{- .Role }}: {{ .Content }} {{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}} {{- end }} -{{ end }}""" -`, createBinFile(t, llm.KV{ - "general.architecture": "llama", - "llama.block_count": uint32(1), - "llama.context_length": uint32(8192), - "llama.embedding_length": uint32(4096), - "llama.attention.head_count": uint32(32), - "llama.attention.head_count_kv": uint32(8), - "tokenizer.ggml.tokens": []string{""}, - "tokenizer.ggml.scores": []float32{0}, - "tokenizer.ggml.token_type": []int32{0}, - }, []llm.Tensor{ - {Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.ffn_gate.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.ffn_up.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.ffn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.attn_k.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.attn_output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.attn_q.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.attn_v.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - })), +{{ end }}`, Stream: &stream, }) @@ -154,12 +154,13 @@ func TestGenerateChat(t *testing.T) { }) t.Run("missing capabilities chat", func(t *testing.T) { + _, digest := createBinFile(t, llm.KV{ + "general.architecture": "bert", + "bert.pooling_type": uint32(0), + }, []llm.Tensor{}) w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Model: "bert", - Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ - "general.architecture": "bert", - "bert.pooling_type": uint32(0), - }, []llm.Tensor{})), + Model: "bert", + Files: map[string]string{"bert.gguf": digest}, Stream: &stream, }) @@ -281,8 +282,9 @@ func TestGenerateChat(t *testing.T) { }) w = createRequest(t, s.CreateHandler, api.CreateRequest{ - Model: "test-system", - Modelfile: "FROM test\nSYSTEM You are a helpful assistant.", + Model: "test-system", + From: "test", + System: "You are a helpful assistant.", }) if w.Code != http.StatusOK { @@ -622,36 +624,38 @@ func TestGenerate(t *testing.T) { go s.sched.Run(context.TODO()) + _, digest := createBinFile(t, llm.KV{ + "general.architecture": "llama", + "llama.block_count": uint32(1), + "llama.context_length": uint32(8192), + "llama.embedding_length": uint32(4096), + "llama.attention.head_count": uint32(32), + "llama.attention.head_count_kv": uint32(8), + "tokenizer.ggml.tokens": []string{""}, + "tokenizer.ggml.scores": []float32{0}, + "tokenizer.ggml.token_type": []int32{0}, + }, []llm.Tensor{ + {Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.ffn_gate.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.ffn_up.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.ffn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.attn_k.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.attn_output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.attn_q.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "blk.0.attn_v.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + {Name: "output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, + }) + w := createRequest(t, s.CreateHandler, api.CreateRequest{ Model: "test", - Modelfile: fmt.Sprintf(`FROM %s - TEMPLATE """ + Files: map[string]string{"file.gguf": digest}, + Template: ` {{- if .System }}System: {{ .System }} {{ end }} {{- if .Prompt }}User: {{ .Prompt }} {{ end }} -{{- if .Response }}Assistant: {{ .Response }} {{ end }}""" -`, createBinFile(t, llm.KV{ - "general.architecture": "llama", - "llama.block_count": uint32(1), - "llama.context_length": uint32(8192), - "llama.embedding_length": uint32(4096), - "llama.attention.head_count": uint32(32), - "llama.attention.head_count_kv": uint32(8), - "tokenizer.ggml.tokens": []string{""}, - "tokenizer.ggml.scores": []float32{0}, - "tokenizer.ggml.token_type": []int32{0}, - }, []llm.Tensor{ - {Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.ffn_gate.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.ffn_up.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.ffn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.attn_k.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.attn_output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.attn_q.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "blk.0.attn_v.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - {Name: "output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, - })), +{{- if .Response }}Assistant: {{ .Response }} {{ end }} +`, Stream: &stream, }) @@ -682,12 +686,14 @@ func TestGenerate(t *testing.T) { }) t.Run("missing capabilities generate", func(t *testing.T) { + _, digest := createBinFile(t, llm.KV{ + "general.architecture": "bert", + "bert.pooling_type": uint32(0), + }, []llm.Tensor{}) + w := createRequest(t, s.CreateHandler, api.CreateRequest{ - Model: "bert", - Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ - "general.architecture": "bert", - "bert.pooling_type": uint32(0), - }, []llm.Tensor{})), + Model: "bert", + Files: map[string]string{"file.gguf": digest}, Stream: &stream, }) @@ -824,8 +830,9 @@ func TestGenerate(t *testing.T) { }) w = createRequest(t, s.CreateHandler, api.CreateRequest{ - Model: "test-system", - Modelfile: "FROM test\nSYSTEM You are a helpful assistant.", + Model: "test-system", + From: "test", + System: "You are a helpful assistant.", }) if w.Code != http.StatusOK { @@ -894,10 +901,10 @@ func TestGenerate(t *testing.T) { w = createRequest(t, s.CreateHandler, api.CreateRequest{ Model: "test-suffix", - Modelfile: `FROM test -TEMPLATE """{{- if .Suffix }}
 {{ .Prompt }} {{ .Suffix }} 
+		Template: `{{- if .Suffix }}
 {{ .Prompt }} {{ .Suffix }} 
 {{- else }}{{ .Prompt }}
-{{- end }}"""`,
+{{- end }}`,
+		From: "test",
 	})
 
 	if w.Code != http.StatusOK {
diff --git a/server/routes_list_test.go b/server/routes_list_test.go
index 56b408300..f6e899ad7 100644
--- a/server/routes_list_test.go
+++ b/server/routes_list_test.go
@@ -2,7 +2,6 @@ package server
 
 import (
 	"encoding/json"
-	"fmt"
 	"net/http"
 	"slices"
 	"testing"
@@ -31,9 +30,11 @@ func TestList(t *testing.T) {
 
 	var s Server
 	for _, n := range expectNames {
+		_, digest := createBinFile(t, nil, nil)
+
 		createRequest(t, s.CreateHandler, api.CreateRequest{
-			Name:      n,
-			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
+			Name:  n,
+			Files: map[string]string{"test.gguf": digest},
 		})
 	}
 
diff --git a/server/routes_test.go b/server/routes_test.go
index bc007714c..2a1643a5f 100644
--- a/server/routes_test.go
+++ b/server/routes_test.go
@@ -23,14 +23,18 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/openai"
-	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
 )
 
-func createTestFile(t *testing.T, name string) string {
+func createTestFile(t *testing.T, name string) (string, string) {
 	t.Helper()
 
+	modelDir := os.Getenv("OLLAMA_MODELS")
+	if modelDir == "" {
+		t.Fatalf("OLLAMA_MODELS not specified")
+	}
+
 	f, err := os.CreateTemp(t.TempDir(), name)
 	if err != nil {
 		t.Fatalf("failed to create temp file: %v", err)
@@ -57,7 +61,21 @@ func createTestFile(t *testing.T, name string) string {
 		t.Fatalf("failed to write to file: %v", err)
 	}
 
-	return f.Name()
+	// Calculate sha256 sum of file
+	if _, err := f.Seek(0, 0); err != nil {
+		t.Fatal(err)
+	}
+
+	digest, _ := GetSHA256Digest(f)
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := createLink(f.Name(), filepath.Join(modelDir, "blobs", fmt.Sprintf("sha256-%s", strings.TrimPrefix(digest, "sha256:")))); err != nil {
+		t.Fatal(err)
+	}
+
+	return f.Name(), digest
 }
 
 // equalStringSlices checks if two slices of strings are equal.
@@ -85,20 +103,32 @@ func Test_Routes(t *testing.T) {
 	createTestModel := func(t *testing.T, name string) {
 		t.Helper()
 
-		fname := createTestFile(t, "ollama-model")
+		_, digest := createTestFile(t, "ollama-model")
 
-		r := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname))
-		modelfile, err := parser.ParseFile(r)
-		if err != nil {
-			t.Fatalf("failed to parse file: %v", err)
-		}
 		fn := func(resp api.ProgressResponse) {
 			t.Logf("Status: %s", resp.Status)
 		}
-		err = CreateModel(context.TODO(), model.ParseName(name), "", "", modelfile, fn)
+
+		r := api.CreateRequest{
+			Name:  name,
+			Files: map[string]string{"test.gguf": digest},
+			Parameters: map[string]any{
+				"seed":  42,
+				"top_p": 0.9,
+				"stop":  []string{"foo", "bar"},
+			},
+		}
+
+		modelName := model.ParseName(name)
+
+		baseLayers, err := ggufLayers(digest, fn)
 		if err != nil {
 			t.Fatalf("failed to create model: %v", err)
 		}
+
+		if err := createModel(r, modelName, baseLayers, fn); err != nil {
+			t.Fatal(err)
+		}
 	}
 
 	testCases := []testCase{
@@ -301,13 +331,12 @@ func Test_Routes(t *testing.T) {
 			Method: http.MethodPost,
 			Path:   "/api/create",
 			Setup: func(t *testing.T, req *http.Request) {
-				fname := createTestFile(t, "ollama-model")
-
+				_, digest := createTestFile(t, "ollama-model")
 				stream := false
 				createReq := api.CreateRequest{
-					Name:      "t-bone",
-					Modelfile: fmt.Sprintf("FROM %s", fname),
-					Stream:    &stream,
+					Name:   "t-bone",
+					Files:  map[string]string{"test.gguf": digest},
+					Stream: &stream,
 				}
 				jsonData, err := json.Marshal(createReq)
 				if err != nil {
@@ -419,7 +448,10 @@ func Test_Routes(t *testing.T) {
 			},
 		},
 		{
-			Name:   "openai retrieve model handler",
+			Name: "openai retrieve model handler",
+			Setup: func(t *testing.T, req *http.Request) {
+				createTestModel(t, "show-model")
+			},
 			Method: http.MethodGet,
 			Path:   "/v1/models/show-model",
 			Expected: func(t *testing.T, resp *http.Response) {
@@ -571,21 +603,21 @@ func TestManifestCaseSensitivity(t *testing.T) {
 	t.Cleanup(func() { testMakeRequestDialContext = nil })
 
 	t.Logf("creating")
+	_, digest := createBinFile(t, nil, nil)
 	checkOK(createRequest(t, s.CreateHandler, api.CreateRequest{
 		// Start with the stable name, and later use a case-shuffled
 		// version.
-		Name: wantStableName,
-
-		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
-		Stream:    &stream,
+		Name:   wantStableName,
+		Files:  map[string]string{"test.gguf": digest},
+		Stream: &stream,
 	}))
 	checkManifestList()
 
 	t.Logf("creating (again)")
 	checkOK(createRequest(t, s.CreateHandler, api.CreateRequest{
-		Name:      name(),
-		Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
-		Stream:    &stream,
+		Name:   name(),
+		Files:  map[string]string{"test.gguf": digest},
+		Stream: &stream,
 	}))
 	checkManifestList()
 
@@ -622,13 +654,12 @@ func TestShow(t *testing.T) {
 
 	var s Server
 
+	_, digest1 := createBinFile(t, llm.KV{"general.architecture": "test"}, nil)
+	_, digest2 := createBinFile(t, llm.KV{"general.type": "projector", "general.architecture": "clip"}, nil)
+
 	createRequest(t, s.CreateHandler, api.CreateRequest{
-		Name: "show-model",
-		Modelfile: fmt.Sprintf(
-			"FROM %s\nFROM %s",
-			createBinFile(t, llm.KV{"general.architecture": "test"}, nil),
-			createBinFile(t, llm.KV{"general.type": "projector", "general.architecture": "clip"}, nil),
-		),
+		Name:  "show-model",
+		Files: map[string]string{"model.gguf": digest1, "projector.gguf": digest2},
 	})
 
 	w := createRequest(t, s.ShowHandler, api.ShowRequest{