mirror of
https://github.com/ollama/ollama.git
synced 2025-03-18 22:01:47 +01:00
694 lines
16 KiB
Go
694 lines
16 KiB
Go
package server
|
|
|
|
import (
|
|
"bytes"
|
|
"cmp"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"slices"
|
|
"strings"
|
|
|
|
"github.com/gin-gonic/gin"
|
|
|
|
"github.com/ollama/ollama/api"
|
|
"github.com/ollama/ollama/convert"
|
|
"github.com/ollama/ollama/envconfig"
|
|
"github.com/ollama/ollama/format"
|
|
"github.com/ollama/ollama/llama"
|
|
"github.com/ollama/ollama/llm"
|
|
"github.com/ollama/ollama/template"
|
|
"github.com/ollama/ollama/types/errtypes"
|
|
"github.com/ollama/ollama/types/model"
|
|
)
|
|
|
|
var (
|
|
errNoFilesProvided = errors.New("no files provided to convert")
|
|
errOnlyOneAdapterSupported = errors.New("only one adapter is currently supported")
|
|
errOnlyGGUFSupported = errors.New("supplied file was not in GGUF format")
|
|
errUnknownType = errors.New("unknown type")
|
|
errNeitherFromOrFiles = errors.New("neither 'from' or 'files' was specified")
|
|
)
|
|
|
|
func (s *Server) CreateHandler(c *gin.Context) {
|
|
var r api.CreateRequest
|
|
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
|
return
|
|
} else if err != nil {
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
return
|
|
}
|
|
|
|
name := model.ParseName(cmp.Or(r.Model, r.Name))
|
|
if !name.IsValid() {
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
|
|
return
|
|
}
|
|
|
|
name, err := getExistingName(name)
|
|
if err != nil {
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
return
|
|
}
|
|
|
|
ch := make(chan any)
|
|
go func() {
|
|
defer close(ch)
|
|
fn := func(resp api.ProgressResponse) {
|
|
ch <- resp
|
|
}
|
|
|
|
oldManifest, _ := ParseNamedManifest(name)
|
|
|
|
var baseLayers []*layerGGML
|
|
if r.From != "" {
|
|
slog.Debug("create model from model name")
|
|
fromName := model.ParseName(r.From)
|
|
if !fromName.IsValid() {
|
|
ch <- gin.H{"error": errtypes.InvalidModelNameErrMsg, "status": http.StatusBadRequest}
|
|
return
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(c.Request.Context())
|
|
defer cancel()
|
|
|
|
baseLayers, err = parseFromModel(ctx, fromName, fn)
|
|
if err != nil {
|
|
ch <- gin.H{"error": err.Error()}
|
|
}
|
|
} else if r.Files != nil {
|
|
baseLayers, err = convertModelFromFiles(r.Files, baseLayers, false, fn)
|
|
if err != nil {
|
|
for _, badReq := range []error{errNoFilesProvided, errOnlyGGUFSupported, errUnknownType} {
|
|
if errors.Is(err, badReq) {
|
|
ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
|
|
return
|
|
}
|
|
}
|
|
ch <- gin.H{"error": err.Error()}
|
|
return
|
|
}
|
|
} else {
|
|
ch <- gin.H{"error": errNeitherFromOrFiles.Error(), "status": http.StatusBadRequest}
|
|
return
|
|
}
|
|
|
|
var adapterLayers []*layerGGML
|
|
if r.Adapters != nil {
|
|
adapterLayers, err = convertModelFromFiles(r.Adapters, baseLayers, true, fn)
|
|
if err != nil {
|
|
for _, badReq := range []error{errNoFilesProvided, errOnlyOneAdapterSupported, errOnlyGGUFSupported, errUnknownType} {
|
|
if errors.Is(err, badReq) {
|
|
ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
|
|
return
|
|
}
|
|
}
|
|
ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
|
|
return
|
|
}
|
|
}
|
|
|
|
if len(adapterLayers) > 0 {
|
|
baseLayers = append(baseLayers, adapterLayers...)
|
|
}
|
|
|
|
if err := createModel(r, name, baseLayers, fn); err != nil {
|
|
if errors.Is(err, errBadTemplate) {
|
|
ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
|
|
return
|
|
}
|
|
ch <- gin.H{"error": err.Error()}
|
|
return
|
|
}
|
|
|
|
if !envconfig.NoPrune() && oldManifest != nil {
|
|
if err := oldManifest.RemoveLayers(); err != nil {
|
|
ch <- gin.H{"error": err.Error()}
|
|
}
|
|
}
|
|
|
|
ch <- api.ProgressResponse{Status: "success"}
|
|
}()
|
|
|
|
if r.Stream != nil && !*r.Stream {
|
|
waitForStream(c, ch)
|
|
return
|
|
}
|
|
|
|
streamResponse(c, ch)
|
|
}
|
|
|
|
func convertModelFromFiles(files map[string]string, baseLayers []*layerGGML, isAdapter bool, fn func(resp api.ProgressResponse)) ([]*layerGGML, error) {
|
|
switch detectModelTypeFromFiles(files) {
|
|
case "safetensors":
|
|
layers, err := convertFromSafetensors(files, baseLayers, isAdapter, fn)
|
|
if err != nil {
|
|
slog.Error("error converting from safetensors", "error", err)
|
|
return nil, err
|
|
}
|
|
return layers, nil
|
|
case "gguf":
|
|
if len(files) == 0 {
|
|
return nil, errNoFilesProvided
|
|
} else if len(files) > 1 && isAdapter {
|
|
return nil, errOnlyOneAdapterSupported
|
|
}
|
|
|
|
var digest string
|
|
var allLayers []*layerGGML
|
|
for _, v := range files {
|
|
digest = v
|
|
layers, err := ggufLayers(digest, fn)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
allLayers = append(allLayers, layers...)
|
|
}
|
|
return allLayers, nil
|
|
default:
|
|
return nil, errUnknownType
|
|
}
|
|
}
|
|
|
|
func detectModelTypeFromFiles(files map[string]string) string {
|
|
for fn := range files {
|
|
if strings.HasSuffix(fn, ".safetensors") {
|
|
return "safetensors"
|
|
} else if strings.HasSuffix(fn, ".gguf") {
|
|
return "gguf"
|
|
} else {
|
|
// try to see if we can find a gguf file even without the file extension
|
|
blobPath, err := GetBlobsPath(files[fn])
|
|
if err != nil {
|
|
slog.Error("error getting blobs path", "file", fn)
|
|
return ""
|
|
}
|
|
|
|
f, err := os.Open(blobPath)
|
|
if err != nil {
|
|
slog.Error("error reading file", "error", err)
|
|
return ""
|
|
}
|
|
defer f.Close()
|
|
|
|
buf := make([]byte, 4)
|
|
_, err = f.Read(buf)
|
|
if err != nil {
|
|
slog.Error("error reading file", "error", err)
|
|
return ""
|
|
}
|
|
|
|
ct := llm.DetectGGMLType(buf)
|
|
if ct == "gguf" {
|
|
return "gguf"
|
|
}
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
func convertFromSafetensors(files map[string]string, baseLayers []*layerGGML, isAdapter bool, fn func(resp api.ProgressResponse)) ([]*layerGGML, error) {
|
|
tmpDir, err := os.MkdirTemp("", "ollama-safetensors")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer os.RemoveAll(tmpDir)
|
|
|
|
for fp, digest := range files {
|
|
blobPath, err := GetBlobsPath(digest)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := createLink(blobPath, filepath.Join(tmpDir, fp)); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
t, err := os.CreateTemp(tmpDir, "fp16")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer t.Close()
|
|
|
|
var mediaType string
|
|
if !isAdapter {
|
|
fn(api.ProgressResponse{Status: "converting model"})
|
|
mediaType = "application/vnd.ollama.image.model"
|
|
if err := convert.ConvertModel(os.DirFS(tmpDir), t); err != nil {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
kv, err := kvFromLayers(baseLayers)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
fn(api.ProgressResponse{Status: "converting adapter"})
|
|
mediaType = "application/vnd.ollama.image.adapter"
|
|
if err := convert.ConvertAdapter(os.DirFS(tmpDir), t, kv); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
if _, err := t.Seek(0, io.SeekStart); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
layer, err := NewLayer(t, mediaType)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
bin, err := layer.Open()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ggml, _, err := llm.DecodeGGML(bin, 0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
layers := []*layerGGML{{layer, ggml}}
|
|
|
|
if !isAdapter {
|
|
return detectChatTemplate(layers)
|
|
}
|
|
return layers, nil
|
|
}
|
|
|
|
func kvFromLayers(baseLayers []*layerGGML) (llm.KV, error) {
|
|
for _, l := range baseLayers {
|
|
if l.GGML != nil {
|
|
return l.KV(), nil
|
|
}
|
|
}
|
|
return llm.KV{}, fmt.Errorf("no base model was found")
|
|
}
|
|
|
|
func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML, fn func(resp api.ProgressResponse)) (err error) {
|
|
config := ConfigV2{
|
|
OS: "linux",
|
|
Architecture: "amd64",
|
|
RootFS: RootFS{
|
|
Type: "layers",
|
|
},
|
|
}
|
|
|
|
var layers []Layer
|
|
for _, layer := range baseLayers {
|
|
if layer.GGML != nil {
|
|
quantType := strings.ToUpper(cmp.Or(r.Quantize, r.Quantization))
|
|
if quantType != "" && layer.GGML.Name() == "gguf" && layer.MediaType == "application/vnd.ollama.image.model" {
|
|
want, err := llm.ParseFileType(quantType)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
ft := layer.GGML.KV().FileType()
|
|
if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
|
|
return errors.New("quantization is only supported for F16 and F32 models")
|
|
} else if ft != want {
|
|
layer, err = quantizeLayer(layer, quantType, fn)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
config.ModelFormat = cmp.Or(config.ModelFormat, layer.GGML.Name())
|
|
config.ModelFamily = cmp.Or(config.ModelFamily, layer.GGML.KV().Architecture())
|
|
config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(layer.GGML.KV().ParameterCount()))
|
|
config.FileType = cmp.Or(config.FileType, layer.GGML.KV().FileType().String())
|
|
config.ModelFamilies = append(config.ModelFamilies, layer.GGML.KV().Architecture())
|
|
}
|
|
layers = append(layers, layer.Layer)
|
|
}
|
|
|
|
if r.Template != "" {
|
|
layers, err = setTemplate(layers, r.Template)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if r.System != "" {
|
|
layers, err = setSystem(layers, r.System)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if r.License != nil {
|
|
switch l := r.License.(type) {
|
|
case string:
|
|
if l != "" {
|
|
layers, err = setLicense(layers, l)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
case any:
|
|
var licenses []string
|
|
b, _ := json.Marshal(l) // re-marshal to JSON
|
|
if err := json.Unmarshal(b, &licenses); err != nil {
|
|
return err
|
|
}
|
|
for _, v := range licenses {
|
|
layers, err = setLicense(layers, v)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
default:
|
|
return fmt.Errorf("unknown license type: %T", l)
|
|
}
|
|
}
|
|
|
|
layers, err = setParameters(layers, r.Parameters)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
layers, err = setMessages(layers, r.Messages)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
configLayer, err := createConfigLayer(layers, config)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, layer := range layers {
|
|
if layer.status != "" {
|
|
fn(api.ProgressResponse{Status: layer.status})
|
|
}
|
|
}
|
|
|
|
fn(api.ProgressResponse{Status: "writing manifest"})
|
|
if err := WriteManifest(name, *configLayer, layers); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.ProgressResponse)) (*layerGGML, error) {
|
|
ft := layer.GGML.KV().FileType()
|
|
fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantizeType)})
|
|
|
|
want, err := llm.ParseFileType(quantizeType)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
blob, err := GetBlobsPath(layer.Digest)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
temp, err := os.CreateTemp(filepath.Dir(blob), quantizeType)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer temp.Close()
|
|
defer os.Remove(temp.Name())
|
|
|
|
if err := llama.Quantize(blob, temp.Name(), uint32(want)); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
newLayer, err := NewLayer(temp, layer.MediaType)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if _, err := temp.Seek(0, io.SeekStart); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ggml, _, err := llm.DecodeGGML(temp, 0)
|
|
if err != nil {
|
|
slog.Error(fmt.Sprintf("error decoding ggml: %s\n", err))
|
|
return nil, err
|
|
}
|
|
|
|
return &layerGGML{newLayer, ggml}, nil
|
|
}
|
|
|
|
func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML, error) {
|
|
var layers []*layerGGML
|
|
|
|
fn(api.ProgressResponse{Status: "parsing GGUF"})
|
|
blobPath, err := GetBlobsPath(digest)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
blob, err := os.Open(blobPath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer blob.Close()
|
|
|
|
sr := io.NewSectionReader(blob, 0, 512)
|
|
contentType, err := detectContentType(sr)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if contentType != "gguf" {
|
|
slog.Error(fmt.Sprintf("unsupported content type: %s", contentType))
|
|
return nil, errOnlyGGUFSupported
|
|
}
|
|
|
|
stat, err := blob.Stat()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var offset int64
|
|
for offset < stat.Size() {
|
|
ggml, n, err := llm.DecodeGGML(blob, 0)
|
|
if errors.Is(err, io.EOF) {
|
|
break
|
|
} else if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
mediatype := "application/vnd.ollama.image.model"
|
|
if ggml.KV().Kind() == "adapter" {
|
|
mediatype = "application/vnd.ollama.image.adapter"
|
|
} else if _, ok := ggml.KV()[fmt.Sprintf("%s.vision.block_count", ggml.KV().Architecture())]; ok || ggml.KV().Kind() == "projector" {
|
|
mediatype = "application/vnd.ollama.image.projector"
|
|
}
|
|
|
|
var layer Layer
|
|
if digest != "" && n == stat.Size() && offset == 0 {
|
|
layer, err = NewLayerFromLayer(digest, mediatype, blob.Name())
|
|
if err != nil {
|
|
slog.Debug("could not create new layer from layer", "error", err)
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// Fallback to creating layer from file copy (either NewLayerFromLayer failed, or digest empty/n != stat.Size())
|
|
if layer.Digest == "" {
|
|
layer, err = NewLayer(io.NewSectionReader(blob, offset, n), mediatype)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
layers = append(layers, &layerGGML{layer, ggml})
|
|
offset = n
|
|
}
|
|
|
|
return detectChatTemplate(layers)
|
|
}
|
|
|
|
func removeLayer(layers []Layer, mediatype string) []Layer {
|
|
return slices.DeleteFunc(layers, func(layer Layer) bool {
|
|
if layer.MediaType != mediatype {
|
|
return false
|
|
}
|
|
|
|
if err := layer.Remove(); err != nil {
|
|
slog.Warn("couldn't remove blob", "digest", layer.Digest, "error", err)
|
|
return true
|
|
}
|
|
|
|
return true
|
|
})
|
|
}
|
|
|
|
func setTemplate(layers []Layer, t string) ([]Layer, error) {
|
|
layers = removeLayer(layers, "application/vnd.ollama.image.template")
|
|
if _, err := template.Parse(t); err != nil {
|
|
return nil, fmt.Errorf("%w: %s", errBadTemplate, err)
|
|
}
|
|
if _, err := template.Parse(t); err != nil {
|
|
return nil, fmt.Errorf("%w: %s", errBadTemplate, err)
|
|
}
|
|
|
|
blob := strings.NewReader(t)
|
|
layer, err := NewLayer(blob, "application/vnd.ollama.image.template")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
layers = append(layers, layer)
|
|
return layers, nil
|
|
}
|
|
|
|
func setSystem(layers []Layer, s string) ([]Layer, error) {
|
|
layers = removeLayer(layers, "application/vnd.ollama.image.system")
|
|
if s != "" {
|
|
blob := strings.NewReader(s)
|
|
layer, err := NewLayer(blob, "application/vnd.ollama.image.system")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
layers = append(layers, layer)
|
|
}
|
|
return layers, nil
|
|
}
|
|
|
|
func setLicense(layers []Layer, l string) ([]Layer, error) {
|
|
blob := strings.NewReader(l)
|
|
layer, err := NewLayer(blob, "application/vnd.ollama.image.license")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
layers = append(layers, layer)
|
|
return layers, nil
|
|
}
|
|
|
|
func setParameters(layers []Layer, p map[string]any) ([]Layer, error) {
|
|
if p == nil {
|
|
p = make(map[string]any)
|
|
}
|
|
for _, layer := range layers {
|
|
if layer.MediaType != "application/vnd.ollama.image.params" {
|
|
continue
|
|
}
|
|
|
|
digestPath, err := GetBlobsPath(layer.Digest)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
fn, err := os.Open(digestPath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer fn.Close()
|
|
|
|
var existing map[string]any
|
|
if err := json.NewDecoder(fn).Decode(&existing); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for k, v := range existing {
|
|
if _, exists := p[k]; exists {
|
|
continue
|
|
}
|
|
p[k] = v
|
|
}
|
|
}
|
|
|
|
if len(p) == 0 {
|
|
return layers, nil
|
|
}
|
|
|
|
layers = removeLayer(layers, "application/vnd.ollama.image.params")
|
|
|
|
var b bytes.Buffer
|
|
if err := json.NewEncoder(&b).Encode(p); err != nil {
|
|
return nil, err
|
|
}
|
|
layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
layers = append(layers, layer)
|
|
return layers, nil
|
|
}
|
|
|
|
func setMessages(layers []Layer, m []api.Message) ([]Layer, error) {
|
|
// this leaves the old messages intact if no new messages were specified
|
|
// which may not be the correct behaviour
|
|
if len(m) == 0 {
|
|
return layers, nil
|
|
}
|
|
|
|
fmt.Printf("removing old messages\n")
|
|
layers = removeLayer(layers, "application/vnd.ollama.image.messages")
|
|
var b bytes.Buffer
|
|
if err := json.NewEncoder(&b).Encode(m); err != nil {
|
|
return nil, err
|
|
}
|
|
layer, err := NewLayer(&b, "application/vnd.ollama.image.messages")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
layers = append(layers, layer)
|
|
return layers, nil
|
|
}
|
|
|
|
func createConfigLayer(layers []Layer, config ConfigV2) (*Layer, error) {
|
|
digests := make([]string, len(layers))
|
|
for i, layer := range layers {
|
|
digests[i] = layer.Digest
|
|
}
|
|
config.RootFS.DiffIDs = digests
|
|
|
|
var b bytes.Buffer
|
|
if err := json.NewEncoder(&b).Encode(config); err != nil {
|
|
return nil, err
|
|
}
|
|
layer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &layer, nil
|
|
}
|
|
|
|
func createLink(src, dst string) error {
|
|
// make any subdirs for dst
|
|
if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil {
|
|
return err
|
|
}
|
|
|
|
_ = os.Remove(dst)
|
|
if err := os.Symlink(src, dst); err != nil {
|
|
if err := copyFile(src, dst); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func copyFile(src, dst string) error {
|
|
srcFile, err := os.Open(src)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer srcFile.Close()
|
|
|
|
dstFile, err := os.Create(dst)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer dstFile.Close()
|
|
|
|
_, err = io.Copy(dstFile, srcFile)
|
|
return err
|
|
}
|