2023-07-03 15:22:44 -04:00
|
|
|
package server
|
|
|
|
|
|
|
|
import (
|
2024-06-17 10:38:55 -07:00
|
|
|
"bytes"
|
2024-04-30 10:55:19 -07:00
|
|
|
"cmp"
|
2023-07-25 17:08:51 -04:00
|
|
|
"context"
|
2024-10-28 13:51:19 -07:00
|
|
|
"encoding/binary"
|
2023-07-06 10:40:11 -07:00
|
|
|
"encoding/json"
|
2023-10-06 16:06:20 -04:00
|
|
|
"errors"
|
2023-07-21 23:02:12 -07:00
|
|
|
"fmt"
|
2023-07-03 15:22:44 -04:00
|
|
|
"io"
|
2024-12-11 15:29:59 -08:00
|
|
|
"io/fs"
|
2024-01-18 10:52:01 -08:00
|
|
|
"log/slog"
|
2024-07-15 12:14:24 -07:00
|
|
|
"math"
|
2023-07-03 15:22:44 -04:00
|
|
|
"net"
|
|
|
|
"net/http"
|
2024-03-08 22:23:47 -08:00
|
|
|
"net/netip"
|
2023-07-07 15:27:43 -04:00
|
|
|
"os"
|
2023-08-30 16:35:03 -04:00
|
|
|
"os/signal"
|
2023-07-14 17:27:14 -07:00
|
|
|
"path/filepath"
|
2024-05-21 21:30:52 -07:00
|
|
|
"slices"
|
2023-07-06 10:40:11 -07:00
|
|
|
"strings"
|
2023-08-30 16:35:03 -04:00
|
|
|
"syscall"
|
2023-07-12 18:18:06 -07:00
|
|
|
"time"
|
2023-07-03 15:22:44 -04:00
|
|
|
|
2023-07-21 18:01:24 -07:00
|
|
|
"github.com/gin-contrib/cors"
|
2023-07-03 15:22:44 -04:00
|
|
|
"github.com/gin-gonic/gin"
|
2024-08-11 11:57:10 -07:00
|
|
|
"golang.org/x/sync/errgroup"
|
2023-07-03 15:22:44 -04:00
|
|
|
|
2024-03-26 13:04:17 -07:00
|
|
|
"github.com/ollama/ollama/api"
|
2024-10-16 17:45:00 -07:00
|
|
|
"github.com/ollama/ollama/discover"
|
2024-05-24 14:57:15 -07:00
|
|
|
"github.com/ollama/ollama/envconfig"
|
2025-02-14 00:31:21 +00:00
|
|
|
"github.com/ollama/ollama/fs/ggml"
|
2024-03-26 13:04:17 -07:00
|
|
|
"github.com/ollama/ollama/llm"
|
2025-02-05 11:16:28 -08:00
|
|
|
"github.com/ollama/ollama/model/models/mllama"
|
2024-03-26 13:04:17 -07:00
|
|
|
"github.com/ollama/ollama/openai"
|
2025-02-27 12:04:53 -08:00
|
|
|
"github.com/ollama/ollama/server/internal/client/ollama"
|
|
|
|
"github.com/ollama/ollama/server/internal/registry"
|
2024-06-10 14:54:42 -07:00
|
|
|
"github.com/ollama/ollama/template"
|
2024-05-13 18:48:28 -07:00
|
|
|
"github.com/ollama/ollama/types/errtypes"
|
2024-04-16 16:22:38 -07:00
|
|
|
"github.com/ollama/ollama/types/model"
|
2024-03-26 13:04:17 -07:00
|
|
|
"github.com/ollama/ollama/version"
|
2023-07-03 15:22:44 -04:00
|
|
|
)
|
|
|
|
|
server/internal/registry: take over pulls from server package (#9485)
This commit replaces the old pull implementation in the server package
with the new, faster, more robust pull implementation in the registry
package.
The new endpoint, and now the remove endpoint too, are behind the
feature gate "client2" enabled only by setting the OLLAMA_EXPERIMENT
environment variable include "client2".
Currently, the progress indication is wired to perform the same as the
previous implementation to avoid making changes to the CLI, and because
the status reports happen at the start of the download, and the end of
the write to disk, the progress indication is not as smooth as it could
be. This is a known issue and will be addressed in a future change.
This implementation may be ~0.5-1.0% slower in rare cases, depending on
network and disk speed, but is generally MUCH faster and more robust
than the its predecessor in all other cases.
2025-03-05 14:48:18 -08:00
|
|
|
func experimentEnabled(name string) bool {
|
|
|
|
return slices.Contains(strings.Split(os.Getenv("OLLAMA_EXPERIMENT"), ","), name)
|
|
|
|
}
|
|
|
|
|
|
|
|
var useClient2 = experimentEnabled("client2")
|
|
|
|
|
2023-08-22 09:48:35 -07:00
|
|
|
var mode string = gin.DebugMode
|
|
|
|
|
2023-12-14 16:47:40 -08:00
|
|
|
type Server struct {
|
2024-03-30 09:50:05 -07:00
|
|
|
addr net.Addr
|
|
|
|
sched *Scheduler
|
2023-12-14 16:47:40 -08:00
|
|
|
}
|
|
|
|
|
2023-08-22 09:48:35 -07:00
|
|
|
func init() {
|
|
|
|
switch mode {
|
|
|
|
case gin.DebugMode:
|
|
|
|
case gin.ReleaseMode:
|
|
|
|
case gin.TestMode:
|
|
|
|
default:
|
|
|
|
mode = gin.DebugMode
|
|
|
|
}
|
|
|
|
|
|
|
|
gin.SetMode(mode)
|
|
|
|
}
|
|
|
|
|
2024-08-01 14:52:15 -07:00
|
|
|
var (
|
|
|
|
errRequired = errors.New("is required")
|
|
|
|
errBadTemplate = errors.New("template error")
|
|
|
|
)
|
2024-06-20 11:00:08 -07:00
|
|
|
|
2024-01-03 12:01:42 -05:00
|
|
|
func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
|
|
|
|
opts := api.DefaultOptions()
|
|
|
|
if err := opts.FromMap(model.Options); err != nil {
|
|
|
|
return api.Options{}, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := opts.FromMap(requestOpts); err != nil {
|
|
|
|
return api.Options{}, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return opts, nil
|
2023-08-08 15:13:22 -04:00
|
|
|
}
|
|
|
|
|
2024-07-03 09:00:07 -07:00
|
|
|
// scheduleRunner schedules a runner after validating inputs such as capabilities and model options.
|
|
|
|
// It returns the allocated runner, model instance, and consolidated options if successful and error otherwise.
|
|
|
|
func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) {
|
2024-06-17 10:38:55 -07:00
|
|
|
if name == "" {
|
2024-07-03 09:00:07 -07:00
|
|
|
return nil, nil, nil, fmt.Errorf("model %w", errRequired)
|
2023-08-08 15:13:22 -04:00
|
|
|
}
|
|
|
|
|
2024-06-17 10:38:55 -07:00
|
|
|
model, err := GetModel(name)
|
2023-08-08 15:13:22 -04:00
|
|
|
if err != nil {
|
2024-07-03 09:00:07 -07:00
|
|
|
return nil, nil, nil, err
|
2024-01-03 12:01:42 -05:00
|
|
|
}
|
|
|
|
|
2024-06-17 10:38:55 -07:00
|
|
|
if err := model.CheckCapabilities(caps...); err != nil {
|
2024-07-03 09:00:07 -07:00
|
|
|
return nil, nil, nil, fmt.Errorf("%s %w", name, err)
|
2024-02-20 21:37:29 -05:00
|
|
|
}
|
|
|
|
|
2024-06-17 10:38:55 -07:00
|
|
|
opts, err := modelOptions(model, requestOpts)
|
2024-01-03 12:01:42 -05:00
|
|
|
if err != nil {
|
2024-07-03 09:00:07 -07:00
|
|
|
return nil, nil, nil, err
|
2024-01-03 12:01:42 -05:00
|
|
|
}
|
|
|
|
|
2024-06-17 10:38:55 -07:00
|
|
|
runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
|
2024-03-30 09:50:05 -07:00
|
|
|
var runner *runnerRef
|
|
|
|
select {
|
2024-06-17 10:38:55 -07:00
|
|
|
case runner = <-runnerCh:
|
|
|
|
case err = <-errCh:
|
2024-07-03 09:00:07 -07:00
|
|
|
return nil, nil, nil, err
|
2023-08-08 15:13:22 -04:00
|
|
|
}
|
|
|
|
|
2024-07-03 09:00:07 -07:00
|
|
|
return runner.llama, model, &opts, nil
|
2024-06-17 10:38:55 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Server) GenerateHandler(c *gin.Context) {
|
2024-07-13 09:25:31 -07:00
|
|
|
checkpointStart := time.Now()
|
2024-06-17 10:38:55 -07:00
|
|
|
var req api.GenerateRequest
|
|
|
|
if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
|
|
|
return
|
|
|
|
} else if err != nil {
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
2023-12-04 18:01:06 -05:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-12-11 15:29:59 -08:00
|
|
|
name := model.ParseName(req.Model)
|
|
|
|
if !name.IsValid() {
|
|
|
|
// Ideally this is "invalid model name" but we're keeping with
|
|
|
|
// what the API currently returns until we can change it.
|
|
|
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// We cannot currently consolidate this into GetModel because all we'll
|
|
|
|
// induce infinite recursion given the current code structure.
|
|
|
|
name, err := getExistingName(name)
|
|
|
|
if err != nil {
|
|
|
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
model, err := GetModel(name.String())
|
2024-10-18 16:12:35 -07:00
|
|
|
if err != nil {
|
|
|
|
switch {
|
2024-12-11 15:29:59 -08:00
|
|
|
case errors.Is(err, fs.ErrNotExist):
|
2024-10-18 16:12:35 -07:00
|
|
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
|
2024-12-23 23:38:34 +08:00
|
|
|
case err.Error() == errtypes.InvalidModelNameErrMsg:
|
2024-10-18 16:12:35 -07:00
|
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
|
|
default:
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-09-11 16:36:21 -07:00
|
|
|
// expire the runner
|
|
|
|
if req.Prompt == "" && req.KeepAlive != nil && int(req.KeepAlive.Seconds()) == 0 {
|
|
|
|
s.sched.expireRunner(model)
|
|
|
|
|
|
|
|
c.JSON(http.StatusOK, api.GenerateResponse{
|
|
|
|
Model: req.Model,
|
|
|
|
CreatedAt: time.Now().UTC(),
|
|
|
|
Response: "",
|
|
|
|
Done: true,
|
|
|
|
DoneReason: "unload",
|
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-12-04 17:37:12 -08:00
|
|
|
if req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0) {
|
2024-06-17 10:38:55 -07:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
|
2024-06-10 14:54:42 -07:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-06-17 10:38:55 -07:00
|
|
|
caps := []Capability{CapabilityCompletion}
|
2024-06-20 19:13:36 -07:00
|
|
|
if req.Suffix != "" {
|
|
|
|
caps = append(caps, CapabilityInsert)
|
|
|
|
}
|
|
|
|
|
2024-12-11 15:29:59 -08:00
|
|
|
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
|
2024-06-17 10:38:55 -07:00
|
|
|
if errors.Is(err, errCapabilityCompletion) {
|
|
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
|
|
|
|
return
|
|
|
|
} else if err != nil {
|
2024-06-20 11:00:08 -07:00
|
|
|
handleScheduleError(c, req.Model, err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-07-13 09:25:31 -07:00
|
|
|
checkpointLoaded := time.Now()
|
|
|
|
|
2024-10-18 16:12:35 -07:00
|
|
|
// load the model
|
2024-06-20 11:00:08 -07:00
|
|
|
if req.Prompt == "" {
|
|
|
|
c.JSON(http.StatusOK, api.GenerateResponse{
|
|
|
|
Model: req.Model,
|
|
|
|
CreatedAt: time.Now().UTC(),
|
|
|
|
Done: true,
|
|
|
|
DoneReason: "load",
|
|
|
|
})
|
2024-06-17 10:38:55 -07:00
|
|
|
return
|
|
|
|
}
|
2023-12-04 18:01:06 -05:00
|
|
|
|
2024-10-18 16:12:35 -07:00
|
|
|
isMllama := checkMllamaModelFamily(model)
|
|
|
|
if isMllama && len(req.Images) > 1 {
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "this model only supports one image: more than one image sent"})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-06-17 10:38:55 -07:00
|
|
|
images := make([]llm.ImageData, len(req.Images))
|
|
|
|
for i := range req.Images {
|
2025-03-04 09:03:46 -08:00
|
|
|
if isMllama && len(model.ProjectorPaths) > 0 {
|
2024-12-14 19:50:15 -08:00
|
|
|
data, opts, err := mllama.Preprocess(bytes.NewReader(req.Images[i]))
|
2024-10-28 13:51:19 -07:00
|
|
|
if err != nil {
|
|
|
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-12-14 19:50:15 -08:00
|
|
|
ar, ok := opts["aspectRatioIndex"].(int)
|
|
|
|
if !ok {
|
|
|
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-10-28 13:51:19 -07:00
|
|
|
buf := new(bytes.Buffer)
|
|
|
|
err = binary.Write(buf, binary.LittleEndian, data)
|
|
|
|
if err != nil {
|
|
|
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-12-14 19:50:15 -08:00
|
|
|
images[i] = llm.ImageData{ID: i, Data: buf.Bytes(), AspectRatioID: ar}
|
2024-10-28 13:51:19 -07:00
|
|
|
} else {
|
|
|
|
images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
|
|
|
|
}
|
2024-06-17 10:38:55 -07:00
|
|
|
}
|
2023-12-05 14:57:33 -05:00
|
|
|
|
2024-06-17 10:38:55 -07:00
|
|
|
prompt := req.Prompt
|
|
|
|
if !req.Raw {
|
2024-07-03 09:00:07 -07:00
|
|
|
tmpl := m.Template
|
2024-06-17 10:38:55 -07:00
|
|
|
if req.Template != "" {
|
|
|
|
tmpl, err = template.Parse(req.Template)
|
|
|
|
if err != nil {
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-20 19:13:36 -07:00
|
|
|
var values template.Values
|
|
|
|
if req.Suffix != "" {
|
|
|
|
values.Prompt = prompt
|
|
|
|
values.Suffix = req.Suffix
|
|
|
|
} else {
|
|
|
|
var msgs []api.Message
|
|
|
|
if req.System != "" {
|
|
|
|
msgs = append(msgs, api.Message{Role: "system", Content: req.System})
|
|
|
|
} else if m.System != "" {
|
|
|
|
msgs = append(msgs, api.Message{Role: "system", Content: m.System})
|
|
|
|
}
|
|
|
|
|
2024-06-19 14:14:28 -07:00
|
|
|
if req.Context == nil {
|
|
|
|
msgs = append(msgs, m.Messages...)
|
|
|
|
}
|
|
|
|
|
2024-06-20 19:13:36 -07:00
|
|
|
for _, i := range images {
|
2024-10-11 15:34:01 -07:00
|
|
|
imgPrompt := ""
|
2024-10-18 16:12:35 -07:00
|
|
|
if isMllama {
|
2024-10-11 15:34:01 -07:00
|
|
|
imgPrompt = "<|image|>"
|
2024-10-18 16:12:35 -07:00
|
|
|
}
|
2024-10-11 15:34:01 -07:00
|
|
|
msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]"+imgPrompt, i.ID)})
|
2024-06-20 19:13:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
values.Messages = append(msgs, api.Message{Role: "user", Content: req.Prompt})
|
|
|
|
}
|
|
|
|
|
2024-06-19 14:14:28 -07:00
|
|
|
var b bytes.Buffer
|
|
|
|
if req.Context != nil {
|
2024-11-30 14:05:50 -08:00
|
|
|
slog.Warn("the context field is deprecated and will be removed in a future version of Ollama")
|
2024-08-01 19:56:15 +07:00
|
|
|
s, err := r.Detokenize(c.Request.Context(), req.Context)
|
2024-06-19 14:14:28 -07:00
|
|
|
if err != nil {
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
2024-08-02 03:50:05 +07:00
|
|
|
b.WriteString(s)
|
2024-06-19 14:14:28 -07:00
|
|
|
}
|
2024-08-02 03:50:05 +07:00
|
|
|
|
|
|
|
if err := tmpl.Execute(&b, values); err != nil {
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
prompt = b.String()
|
2023-08-08 15:13:22 -04:00
|
|
|
}
|
|
|
|
|
2024-11-04 13:05:16 -08:00
|
|
|
slog.Debug("generate request", "images", len(images), "prompt", prompt)
|
2024-01-28 15:22:35 -08:00
|
|
|
|
2023-12-04 18:01:06 -05:00
|
|
|
ch := make(chan any)
|
|
|
|
go func() {
|
2024-07-13 09:25:31 -07:00
|
|
|
// TODO (jmorganca): avoid building the response twice both here and below
|
|
|
|
var sb strings.Builder
|
2023-12-04 18:01:06 -05:00
|
|
|
defer close(ch)
|
2024-07-03 09:00:07 -07:00
|
|
|
if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
|
2024-06-17 10:38:55 -07:00
|
|
|
Prompt: prompt,
|
|
|
|
Images: images,
|
2024-12-04 17:37:12 -08:00
|
|
|
Format: req.Format,
|
2024-07-03 09:00:07 -07:00
|
|
|
Options: opts,
|
2024-07-13 09:25:31 -07:00
|
|
|
}, func(cr llm.CompletionResponse) {
|
|
|
|
res := api.GenerateResponse{
|
2024-05-09 13:30:14 -07:00
|
|
|
Model: req.Model,
|
|
|
|
CreatedAt: time.Now().UTC(),
|
2024-07-13 09:25:31 -07:00
|
|
|
Response: cr.Content,
|
|
|
|
Done: cr.Done,
|
|
|
|
DoneReason: cr.DoneReason,
|
2023-12-05 14:57:33 -05:00
|
|
|
Metrics: api.Metrics{
|
2024-07-13 09:25:31 -07:00
|
|
|
PromptEvalCount: cr.PromptEvalCount,
|
|
|
|
PromptEvalDuration: cr.PromptEvalDuration,
|
|
|
|
EvalCount: cr.EvalCount,
|
|
|
|
EvalDuration: cr.EvalDuration,
|
2023-12-05 14:57:33 -05:00
|
|
|
},
|
2023-12-04 18:01:06 -05:00
|
|
|
}
|
2024-07-13 09:25:31 -07:00
|
|
|
|
|
|
|
if _, err := sb.WriteString(cr.Content); err != nil {
|
|
|
|
ch <- gin.H{"error": err.Error()}
|
|
|
|
}
|
|
|
|
|
|
|
|
if cr.Done {
|
|
|
|
res.TotalDuration = time.Since(checkpointStart)
|
|
|
|
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
|
|
|
|
|
|
|
if !req.Raw {
|
2024-08-02 03:50:05 +07:00
|
|
|
tokens, err := r.Tokenize(c.Request.Context(), prompt+sb.String())
|
2024-07-13 09:25:31 -07:00
|
|
|
if err != nil {
|
|
|
|
ch <- gin.H{"error": err.Error()}
|
|
|
|
return
|
|
|
|
}
|
2024-08-02 03:50:05 +07:00
|
|
|
res.Context = tokens
|
2024-07-13 09:25:31 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ch <- res
|
2024-06-17 10:38:55 -07:00
|
|
|
}); err != nil {
|
2023-12-04 18:01:06 -05:00
|
|
|
ch <- gin.H{"error": err.Error()}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
if req.Stream != nil && !*req.Stream {
|
2024-06-17 10:38:55 -07:00
|
|
|
var r api.GenerateResponse
|
2023-12-05 14:57:33 -05:00
|
|
|
var sb strings.Builder
|
2024-06-17 10:38:55 -07:00
|
|
|
for rr := range ch {
|
|
|
|
switch t := rr.(type) {
|
2023-12-10 10:53:38 -05:00
|
|
|
case api.GenerateResponse:
|
2024-06-17 10:38:55 -07:00
|
|
|
sb.WriteString(t.Response)
|
|
|
|
r = t
|
2023-12-10 10:53:38 -05:00
|
|
|
case gin.H:
|
2024-06-17 10:38:55 -07:00
|
|
|
msg, ok := t["error"].(string)
|
|
|
|
if !ok {
|
|
|
|
msg = "unexpected error format in response"
|
2023-12-10 10:53:38 -05:00
|
|
|
}
|
2024-06-17 10:38:55 -07:00
|
|
|
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
|
|
|
|
return
|
2023-12-10 10:53:38 -05:00
|
|
|
default:
|
2024-06-17 10:38:55 -07:00
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
|
2023-12-04 18:01:06 -05:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
2023-12-10 10:53:38 -05:00
|
|
|
|
2024-06-17 10:38:55 -07:00
|
|
|
r.Response = sb.String()
|
|
|
|
c.JSON(http.StatusOK, r)
|
2023-12-04 18:01:06 -05:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
streamResponse(c, ch)
|
|
|
|
}
|
|
|
|
|
2024-07-15 12:14:24 -07:00
|
|
|
func (s *Server) EmbedHandler(c *gin.Context) {
|
2024-07-30 13:12:21 -07:00
|
|
|
checkpointStart := time.Now()
|
2024-07-15 12:14:24 -07:00
|
|
|
var req api.EmbedRequest
|
|
|
|
err := c.ShouldBindJSON(&req)
|
|
|
|
switch {
|
|
|
|
case errors.Is(err, io.EOF):
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
|
|
|
return
|
|
|
|
case err != nil:
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
truncate := true
|
|
|
|
|
|
|
|
if req.Truncate != nil && !*req.Truncate {
|
|
|
|
truncate = false
|
|
|
|
}
|
|
|
|
|
|
|
|
var input []string
|
|
|
|
|
|
|
|
switch i := req.Input.(type) {
|
|
|
|
case string:
|
|
|
|
if len(i) > 0 {
|
|
|
|
input = append(input, i)
|
|
|
|
}
|
|
|
|
case []any:
|
|
|
|
for _, v := range i {
|
|
|
|
if _, ok := v.(string); !ok {
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
input = append(input, v.(string))
|
|
|
|
}
|
|
|
|
default:
|
2024-08-13 13:19:56 -04:00
|
|
|
if req.Input != nil {
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
|
|
|
|
return
|
|
|
|
}
|
2024-07-15 12:14:24 -07:00
|
|
|
}
|
|
|
|
|
2024-12-11 15:29:59 -08:00
|
|
|
name, err := getExistingName(model.ParseName(req.Model))
|
|
|
|
if err != nil {
|
|
|
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), []Capability{}, req.Options, req.KeepAlive)
|
2024-07-15 12:14:24 -07:00
|
|
|
if err != nil {
|
|
|
|
handleScheduleError(c, req.Model, err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-07-30 13:12:21 -07:00
|
|
|
checkpointLoaded := time.Now()
|
|
|
|
|
2024-08-13 13:19:56 -04:00
|
|
|
if len(input) == 0 {
|
|
|
|
c.JSON(http.StatusOK, api.EmbedResponse{Model: req.Model, Embeddings: [][]float32{}})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2025-03-13 14:24:27 -07:00
|
|
|
kvData, _, err := getModelData(m.ModelPath, false)
|
2024-07-15 12:14:24 -07:00
|
|
|
if err != nil {
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-08-11 11:57:10 -07:00
|
|
|
var count int
|
2024-07-15 12:14:24 -07:00
|
|
|
for i, s := range input {
|
|
|
|
tokens, err := r.Tokenize(c.Request.Context(), s)
|
|
|
|
if err != nil {
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
ctxLen := min(opts.NumCtx, int(kvData.ContextLength()))
|
|
|
|
if len(tokens) > ctxLen {
|
|
|
|
if !truncate {
|
|
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": "input length exceeds maximum context length"})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
tokens = tokens[:ctxLen]
|
|
|
|
s, err = r.Detokenize(c.Request.Context(), tokens)
|
|
|
|
if err != nil {
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-08-11 11:57:10 -07:00
|
|
|
count += len(tokens)
|
|
|
|
|
2024-07-15 12:14:24 -07:00
|
|
|
input[i] = s
|
|
|
|
}
|
2024-08-11 11:57:10 -07:00
|
|
|
|
|
|
|
var g errgroup.Group
|
|
|
|
embeddings := make([][]float32, len(input))
|
|
|
|
for i, text := range input {
|
|
|
|
g.Go(func() error {
|
|
|
|
embedding, err := r.Embedding(c.Request.Context(), text)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
embeddings[i] = normalize(embedding)
|
|
|
|
return nil
|
|
|
|
})
|
2024-07-15 12:14:24 -07:00
|
|
|
}
|
|
|
|
|
2024-08-11 11:57:10 -07:00
|
|
|
if err := g.Wait(); err != nil {
|
2025-03-13 11:22:19 -07:00
|
|
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": strings.TrimSpace(err.Error())})
|
2024-08-11 11:57:10 -07:00
|
|
|
return
|
2024-07-15 12:14:24 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
resp := api.EmbedResponse{
|
2024-07-30 13:12:21 -07:00
|
|
|
Model: req.Model,
|
2024-08-11 11:57:10 -07:00
|
|
|
Embeddings: embeddings,
|
2024-07-30 13:12:21 -07:00
|
|
|
TotalDuration: time.Since(checkpointStart),
|
|
|
|
LoadDuration: checkpointLoaded.Sub(checkpointStart),
|
2024-08-11 11:57:10 -07:00
|
|
|
PromptEvalCount: count,
|
2024-07-15 12:14:24 -07:00
|
|
|
}
|
|
|
|
c.JSON(http.StatusOK, resp)
|
|
|
|
}
|
|
|
|
|
|
|
|
func normalize(vec []float32) []float32 {
|
|
|
|
var sum float32
|
|
|
|
for _, v := range vec {
|
|
|
|
sum += v * v
|
|
|
|
}
|
|
|
|
|
|
|
|
norm := float32(0.0)
|
|
|
|
if sum > 0 {
|
|
|
|
norm = float32(1.0 / math.Sqrt(float64(sum)))
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := range vec {
|
|
|
|
vec[i] *= norm
|
|
|
|
}
|
|
|
|
return vec
|
|
|
|
}
|
|
|
|
|
2024-03-30 09:50:05 -07:00
|
|
|
func (s *Server) EmbeddingsHandler(c *gin.Context) {
|
2023-12-04 18:01:06 -05:00
|
|
|
var req api.EmbeddingRequest
|
2024-06-17 10:38:55 -07:00
|
|
|
if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
|
2023-12-04 18:01:06 -05:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
|
|
|
return
|
2024-06-17 10:38:55 -07:00
|
|
|
} else if err != nil {
|
2023-12-04 18:01:06 -05:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-12-11 15:29:59 -08:00
|
|
|
name := model.ParseName(req.Model)
|
|
|
|
if !name.IsValid() {
|
|
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
r, _, _, err := s.scheduleRunner(c.Request.Context(), name.String(), []Capability{}, req.Options, req.KeepAlive)
|
2023-12-04 18:01:06 -05:00
|
|
|
if err != nil {
|
2024-06-20 11:00:08 -07:00
|
|
|
handleScheduleError(c, req.Model, err)
|
2023-12-04 18:01:06 -05:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-02-29 17:40:56 -08:00
|
|
|
// an empty request loads the model
|
|
|
|
if req.Prompt == "" {
|
|
|
|
c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: []float64{}})
|
2023-08-08 15:13:22 -04:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-08-11 11:57:10 -07:00
|
|
|
embedding, err := r.Embedding(c.Request.Context(), req.Prompt)
|
2023-08-08 15:13:22 -04:00
|
|
|
if err != nil {
|
2025-03-13 11:22:19 -07:00
|
|
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": strings.TrimSpace(err.Error())})
|
2023-08-08 15:13:22 -04:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-08-11 11:57:10 -07:00
|
|
|
var e []float64
|
|
|
|
for _, v := range embedding {
|
|
|
|
e = append(e, float64(v))
|
2024-07-15 12:14:24 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
resp := api.EmbeddingResponse{
|
2024-08-11 11:57:10 -07:00
|
|
|
Embedding: e,
|
2024-07-15 12:14:24 -07:00
|
|
|
}
|
|
|
|
c.JSON(http.StatusOK, resp)
|
2023-08-08 15:13:22 -04:00
|
|
|
}
|
|
|
|
|
2024-08-26 19:36:11 -07:00
|
|
|
func (s *Server) PullHandler(c *gin.Context) {
|
2023-07-11 11:54:22 -07:00
|
|
|
var req api.PullRequest
|
2023-10-18 16:08:42 -07:00
|
|
|
err := c.ShouldBindJSON(&req)
|
|
|
|
switch {
|
|
|
|
case errors.Is(err, io.EOF):
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
|
|
|
return
|
|
|
|
case err != nil:
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
2023-07-11 11:54:22 -07:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-05-13 15:27:51 -07:00
|
|
|
name := model.ParseName(cmp.Or(req.Model, req.Name))
|
|
|
|
if !name.IsValid() {
|
2024-12-23 23:38:34 +08:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
|
2024-05-13 15:27:51 -07:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
server: allow mixed-case model names on push, pull, cp, and create (#7676)
This change allows for mixed-case model names to be pushed, pulled,
copied, and created, which was previously disallowed because the Ollama
registry was backed by a Docker registry that enforced a naming
convention that disallowed mixed-case names, which is no longer the
case.
This does not break existing, intended, behaviors.
Also, make TestCase test a story of creating, updating, pulling, and
copying a model with case variations, ensuring the model's manifest is
updated correctly, and not duplicated across different files with
different case variations.
2024-11-19 15:05:57 -08:00
|
|
|
name, err = getExistingName(name)
|
|
|
|
if err != nil {
|
2024-05-13 15:27:51 -07:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
2023-10-18 15:56:34 -07:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-07-16 17:02:22 -07:00
|
|
|
ch := make(chan any)
|
|
|
|
go func() {
|
|
|
|
defer close(ch)
|
2023-07-18 18:51:30 -07:00
|
|
|
fn := func(r api.ProgressResponse) {
|
|
|
|
ch <- r
|
2023-07-16 17:02:22 -07:00
|
|
|
}
|
2023-07-18 18:51:30 -07:00
|
|
|
|
2024-02-14 11:29:49 -08:00
|
|
|
regOpts := ®istryOptions{
|
2023-07-21 15:42:19 -07:00
|
|
|
Insecure: req.Insecure,
|
|
|
|
}
|
|
|
|
|
2023-07-25 17:08:51 -04:00
|
|
|
ctx, cancel := context.WithCancel(c.Request.Context())
|
|
|
|
defer cancel()
|
|
|
|
|
2024-05-13 15:27:51 -07:00
|
|
|
if err := PullModel(ctx, name.DisplayShortest(), regOpts, fn); err != nil {
|
2023-07-20 12:12:08 -07:00
|
|
|
ch <- gin.H{"error": err.Error()}
|
2023-07-16 17:02:22 -07:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2023-10-11 12:54:27 -04:00
|
|
|
if req.Stream != nil && !*req.Stream {
|
|
|
|
waitForStream(c, ch)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-07-16 17:02:22 -07:00
|
|
|
streamResponse(c, ch)
|
|
|
|
}
|
|
|
|
|
2024-08-26 19:36:11 -07:00
|
|
|
func (s *Server) PushHandler(c *gin.Context) {
|
2023-07-16 17:02:22 -07:00
|
|
|
var req api.PushRequest
|
2023-10-18 16:08:42 -07:00
|
|
|
err := c.ShouldBindJSON(&req)
|
|
|
|
switch {
|
|
|
|
case errors.Is(err, io.EOF):
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
|
|
|
return
|
|
|
|
case err != nil:
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
2023-07-11 11:54:22 -07:00
|
|
|
return
|
|
|
|
}
|
2023-07-06 10:40:11 -07:00
|
|
|
|
2024-12-11 15:29:59 -08:00
|
|
|
var mname string
|
2024-01-11 14:07:54 -08:00
|
|
|
if req.Model != "" {
|
2024-12-11 15:29:59 -08:00
|
|
|
mname = req.Model
|
2024-01-11 14:07:54 -08:00
|
|
|
} else if req.Name != "" {
|
2024-12-11 15:29:59 -08:00
|
|
|
mname = req.Name
|
2024-01-11 14:07:54 -08:00
|
|
|
} else {
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
2023-10-18 15:56:34 -07:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-07-16 17:02:22 -07:00
|
|
|
ch := make(chan any)
|
|
|
|
go func() {
|
|
|
|
defer close(ch)
|
2023-07-18 18:51:30 -07:00
|
|
|
fn := func(r api.ProgressResponse) {
|
|
|
|
ch <- r
|
2023-07-16 17:02:22 -07:00
|
|
|
}
|
2023-07-18 18:51:30 -07:00
|
|
|
|
2024-02-14 11:29:49 -08:00
|
|
|
regOpts := ®istryOptions{
|
2023-07-21 15:42:19 -07:00
|
|
|
Insecure: req.Insecure,
|
|
|
|
}
|
|
|
|
|
2023-10-09 10:24:27 -07:00
|
|
|
ctx, cancel := context.WithCancel(c.Request.Context())
|
|
|
|
defer cancel()
|
|
|
|
|
2024-12-11 15:29:59 -08:00
|
|
|
name, err := getExistingName(model.ParseName(mname))
|
|
|
|
if err != nil {
|
|
|
|
ch <- gin.H{"error": err.Error()}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := PushModel(ctx, name.DisplayShortest(), regOpts, fn); err != nil {
|
2023-07-20 12:12:08 -07:00
|
|
|
ch <- gin.H{"error": err.Error()}
|
2023-07-16 17:02:22 -07:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2023-10-11 12:54:27 -04:00
|
|
|
if req.Stream != nil && !*req.Stream {
|
|
|
|
waitForStream(c, ch)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-07-16 17:02:22 -07:00
|
|
|
streamResponse(c, ch)
|
|
|
|
}
|
|
|
|
|
2024-12-11 15:29:59 -08:00
|
|
|
// getExistingName searches the models directory for the longest prefix match of
|
|
|
|
// the input name and returns the input name with all existing parts replaced
|
|
|
|
// with each part found. If no parts are found, the input name is returned as
|
|
|
|
// is.
|
server: allow mixed-case model names on push, pull, cp, and create (#7676)
This change allows for mixed-case model names to be pushed, pulled,
copied, and created, which was previously disallowed because the Ollama
registry was backed by a Docker registry that enforced a naming
convention that disallowed mixed-case names, which is no longer the
case.
This does not break existing, intended, behaviors.
Also, make TestCase test a story of creating, updating, pulling, and
copying a model with case variations, ensuring the model's manifest is
updated correctly, and not duplicated across different files with
different case variations.
2024-11-19 15:05:57 -08:00
|
|
|
func getExistingName(n model.Name) (model.Name, error) {
|
|
|
|
var zero model.Name
|
|
|
|
existing, err := Manifests(true)
|
2024-05-13 15:27:51 -07:00
|
|
|
if err != nil {
|
server: allow mixed-case model names on push, pull, cp, and create (#7676)
This change allows for mixed-case model names to be pushed, pulled,
copied, and created, which was previously disallowed because the Ollama
registry was backed by a Docker registry that enforced a naming
convention that disallowed mixed-case names, which is no longer the
case.
This does not break existing, intended, behaviors.
Also, make TestCase test a story of creating, updating, pulling, and
copying a model with case variations, ensuring the model's manifest is
updated correctly, and not duplicated across different files with
different case variations.
2024-11-19 15:05:57 -08:00
|
|
|
return zero, err
|
2024-05-13 15:27:51 -07:00
|
|
|
}
|
2024-12-11 15:29:59 -08:00
|
|
|
var set model.Name // tracks parts already canonicalized
|
server: allow mixed-case model names on push, pull, cp, and create (#7676)
This change allows for mixed-case model names to be pushed, pulled,
copied, and created, which was previously disallowed because the Ollama
registry was backed by a Docker registry that enforced a naming
convention that disallowed mixed-case names, which is no longer the
case.
This does not break existing, intended, behaviors.
Also, make TestCase test a story of creating, updating, pulling, and
copying a model with case variations, ensuring the model's manifest is
updated correctly, and not duplicated across different files with
different case variations.
2024-11-19 15:05:57 -08:00
|
|
|
for e := range existing {
|
2024-12-11 15:29:59 -08:00
|
|
|
if set.Host == "" && strings.EqualFold(e.Host, n.Host) {
|
|
|
|
n.Host = e.Host
|
|
|
|
}
|
|
|
|
if set.Namespace == "" && strings.EqualFold(e.Namespace, n.Namespace) {
|
|
|
|
n.Namespace = e.Namespace
|
|
|
|
}
|
|
|
|
if set.Model == "" && strings.EqualFold(e.Model, n.Model) {
|
|
|
|
n.Model = e.Model
|
|
|
|
}
|
|
|
|
if set.Tag == "" && strings.EqualFold(e.Tag, n.Tag) {
|
|
|
|
n.Tag = e.Tag
|
2024-05-13 15:27:51 -07:00
|
|
|
}
|
|
|
|
}
|
server: allow mixed-case model names on push, pull, cp, and create (#7676)
This change allows for mixed-case model names to be pushed, pulled,
copied, and created, which was previously disallowed because the Ollama
registry was backed by a Docker registry that enforced a naming
convention that disallowed mixed-case names, which is no longer the
case.
This does not break existing, intended, behaviors.
Also, make TestCase test a story of creating, updating, pulling, and
copying a model with case variations, ensuring the model's manifest is
updated correctly, and not duplicated across different files with
different case variations.
2024-11-19 15:05:57 -08:00
|
|
|
return n, nil
|
2024-05-13 15:27:51 -07:00
|
|
|
}
|
|
|
|
|
2024-08-26 19:36:11 -07:00
|
|
|
func (s *Server) DeleteHandler(c *gin.Context) {
|
2024-04-17 17:23:19 -07:00
|
|
|
var r api.DeleteRequest
|
|
|
|
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
|
2023-10-18 16:08:42 -07:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
|
|
|
return
|
2024-04-17 17:23:19 -07:00
|
|
|
} else if err != nil {
|
2023-10-18 16:08:42 -07:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
2023-07-20 16:09:23 -07:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-04-17 17:23:19 -07:00
|
|
|
n := model.ParseName(cmp.Or(r.Model, r.Name))
|
|
|
|
if !n.IsValid() {
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("name %q is invalid", cmp.Or(r.Model, r.Name))})
|
2023-07-21 23:02:12 -07:00
|
|
|
return
|
|
|
|
}
|
2023-09-26 17:28:14 -07:00
|
|
|
|
2024-12-11 15:29:59 -08:00
|
|
|
n, err := getExistingName(n)
|
|
|
|
if err != nil {
|
|
|
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", cmp.Or(r.Model, r.Name))})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-04-17 17:23:19 -07:00
|
|
|
m, err := ParseNamedManifest(n)
|
2023-09-26 17:28:14 -07:00
|
|
|
if err != nil {
|
2024-10-01 15:45:43 -07:00
|
|
|
switch {
|
|
|
|
case os.IsNotExist(err):
|
|
|
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", cmp.Or(r.Model, r.Name))})
|
|
|
|
default:
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
}
|
2023-09-26 17:28:14 -07:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-04-17 17:23:19 -07:00
|
|
|
if err := m.Remove(); err != nil {
|
2023-09-26 17:28:14 -07:00
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
2024-05-08 14:36:08 -07:00
|
|
|
|
|
|
|
if err := m.RemoveLayers(); err != nil {
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
2023-07-20 16:09:23 -07:00
|
|
|
}
|
|
|
|
|
2024-08-26 19:36:11 -07:00
|
|
|
func (s *Server) ShowHandler(c *gin.Context) {
|
2023-09-06 11:04:17 -07:00
|
|
|
var req api.ShowRequest
|
2023-10-18 16:08:42 -07:00
|
|
|
err := c.ShouldBindJSON(&req)
|
|
|
|
switch {
|
|
|
|
case errors.Is(err, io.EOF):
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
|
|
|
return
|
|
|
|
case err != nil:
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
2023-09-06 11:04:17 -07:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-01-11 14:07:54 -08:00
|
|
|
if req.Model != "" {
|
2024-01-18 15:36:50 -08:00
|
|
|
// noop
|
2024-01-11 14:07:54 -08:00
|
|
|
} else if req.Name != "" {
|
2024-01-18 15:36:50 -08:00
|
|
|
req.Model = req.Name
|
2024-01-11 14:07:54 -08:00
|
|
|
} else {
|
2024-01-04 17:23:11 -08:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
2023-10-18 15:56:34 -07:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-01-04 17:23:11 -08:00
|
|
|
resp, err := GetModelInfo(req)
|
2023-09-06 11:04:17 -07:00
|
|
|
if err != nil {
|
2024-06-15 20:53:56 -07:00
|
|
|
switch {
|
|
|
|
case os.IsNotExist(err):
|
2024-01-18 15:36:50 -08:00
|
|
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
|
2024-12-23 23:38:34 +08:00
|
|
|
case err.Error() == errtypes.InvalidModelNameErrMsg:
|
2024-06-15 20:53:56 -07:00
|
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
|
|
default:
|
2023-09-06 11:04:17 -07:00
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
c.JSON(http.StatusOK, resp)
|
|
|
|
}
|
|
|
|
|
2024-01-04 17:23:11 -08:00
|
|
|
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
2024-12-11 15:29:59 -08:00
|
|
|
name := model.ParseName(req.Model)
|
|
|
|
if !name.IsValid() {
|
|
|
|
return nil, errModelPathInvalid
|
|
|
|
}
|
|
|
|
name, err := getExistingName(name)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
m, err := GetModel(name.String())
|
2023-09-06 11:04:17 -07:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-12-11 13:56:22 -08:00
|
|
|
modelDetails := api.ModelDetails{
|
2024-06-15 20:53:56 -07:00
|
|
|
ParentModel: m.ParentModel,
|
|
|
|
Format: m.Config.ModelFormat,
|
|
|
|
Family: m.Config.ModelFamily,
|
|
|
|
Families: m.Config.ModelFamilies,
|
|
|
|
ParameterSize: m.Config.ModelType,
|
|
|
|
QuantizationLevel: m.Config.FileType,
|
2023-12-11 13:56:22 -08:00
|
|
|
}
|
|
|
|
|
2024-01-04 17:23:11 -08:00
|
|
|
if req.System != "" {
|
2024-06-15 20:53:56 -07:00
|
|
|
m.System = req.System
|
2024-01-04 17:23:11 -08:00
|
|
|
}
|
|
|
|
|
2024-06-17 10:38:55 -07:00
|
|
|
msgs := make([]api.Message, len(m.Messages))
|
|
|
|
for i, msg := range m.Messages {
|
|
|
|
msgs[i] = api.Message{Role: msg.Role, Content: msg.Content}
|
2024-01-25 12:12:36 -08:00
|
|
|
}
|
|
|
|
|
2024-12-11 15:29:59 -08:00
|
|
|
manifest, err := ParseNamedManifest(name)
|
2024-06-15 20:53:56 -07:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-09-06 11:04:17 -07:00
|
|
|
resp := &api.ShowResponse{
|
2024-06-15 20:53:56 -07:00
|
|
|
License: strings.Join(m.License, "\n"),
|
|
|
|
System: m.System,
|
2024-06-10 14:54:42 -07:00
|
|
|
Template: m.Template.String(),
|
2024-06-15 20:53:56 -07:00
|
|
|
Details: modelDetails,
|
|
|
|
Messages: msgs,
|
|
|
|
ModifiedAt: manifest.fi.ModTime(),
|
2023-09-06 11:04:17 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
var params []string
|
|
|
|
cs := 30
|
2024-06-15 20:53:56 -07:00
|
|
|
for k, v := range m.Options {
|
2023-09-06 11:04:17 -07:00
|
|
|
switch val := v.(type) {
|
|
|
|
case []interface{}:
|
|
|
|
for _, nv := range val {
|
2024-01-16 10:34:44 -08:00
|
|
|
params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
|
2023-09-06 11:04:17 -07:00
|
|
|
}
|
2024-01-16 10:34:44 -08:00
|
|
|
default:
|
|
|
|
params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
|
2023-09-06 11:04:17 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
resp.Parameters = strings.Join(params, "\n")
|
|
|
|
|
2024-01-04 17:23:11 -08:00
|
|
|
for k, v := range req.Options {
|
|
|
|
if _, ok := req.Options[k]; ok {
|
2024-06-15 20:53:56 -07:00
|
|
|
m.Options[k] = v
|
2024-01-04 17:23:11 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-26 16:59:31 -07:00
|
|
|
var sb strings.Builder
|
2024-05-14 15:34:29 -07:00
|
|
|
fmt.Fprintln(&sb, "# Modelfile generated by \"ollama show\"")
|
2024-04-26 16:59:31 -07:00
|
|
|
fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
|
2024-06-15 20:53:56 -07:00
|
|
|
fmt.Fprintf(&sb, "# FROM %s\n\n", m.ShortName)
|
|
|
|
fmt.Fprint(&sb, m.String())
|
2024-04-26 16:59:31 -07:00
|
|
|
resp.Modelfile = sb.String()
|
2024-01-04 17:23:11 -08:00
|
|
|
|
2025-03-13 14:24:27 -07:00
|
|
|
kvData, tensors, err := getModelData(m.ModelPath, req.Verbose)
|
2024-06-19 14:19:02 -07:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2025-03-13 14:24:27 -07:00
|
|
|
|
2024-06-19 14:19:02 -07:00
|
|
|
delete(kvData, "general.name")
|
|
|
|
delete(kvData, "tokenizer.chat_template")
|
|
|
|
resp.ModelInfo = kvData
|
|
|
|
|
2025-03-13 14:24:27 -07:00
|
|
|
tensorData := make([]api.Tensor, len(tensors.Items()))
|
|
|
|
for cnt, t := range tensors.Items() {
|
|
|
|
tensorData[cnt] = api.Tensor{Name: t.Name, Type: t.Type(), Shape: t.Shape}
|
|
|
|
}
|
|
|
|
resp.Tensors = tensorData
|
|
|
|
|
2024-06-19 14:19:02 -07:00
|
|
|
if len(m.ProjectorPaths) > 0 {
|
2025-03-13 14:24:27 -07:00
|
|
|
projectorData, _, err := getModelData(m.ProjectorPaths[0], req.Verbose)
|
2024-06-19 14:19:02 -07:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
resp.ProjectorInfo = projectorData
|
|
|
|
}
|
|
|
|
|
2023-09-06 11:04:17 -07:00
|
|
|
return resp, nil
|
|
|
|
}
|
|
|
|
|
2025-03-13 14:24:27 -07:00
|
|
|
func getModelData(digest string, verbose bool) (ggml.KV, ggml.Tensors, error) {
|
2024-06-24 21:47:52 -07:00
|
|
|
maxArraySize := 0
|
|
|
|
if verbose {
|
|
|
|
maxArraySize = -1
|
|
|
|
}
|
2025-03-13 14:24:27 -07:00
|
|
|
data, err := llm.LoadModel(digest, maxArraySize)
|
2024-06-19 14:19:02 -07:00
|
|
|
if err != nil {
|
2025-03-13 14:24:27 -07:00
|
|
|
return nil, ggml.Tensors{}, err
|
2024-06-19 14:19:02 -07:00
|
|
|
}
|
|
|
|
|
2025-03-13 14:24:27 -07:00
|
|
|
kv := data.KV()
|
2024-06-19 14:19:02 -07:00
|
|
|
|
|
|
|
if !verbose {
|
|
|
|
for k := range kv {
|
|
|
|
if t, ok := kv[k].([]any); len(t) > 5 && ok {
|
|
|
|
kv[k] = []any{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-03-13 14:24:27 -07:00
|
|
|
return kv, data.Tensors(), nil
|
2024-06-19 14:19:02 -07:00
|
|
|
}
|
|
|
|
|
2024-08-26 19:36:11 -07:00
|
|
|
func (s *Server) ListHandler(c *gin.Context) {
|
2024-11-05 14:21:45 -08:00
|
|
|
ms, err := Manifests(true)
|
2023-07-18 09:09:45 -07:00
|
|
|
if err != nil {
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
2023-08-30 14:14:12 -04:00
|
|
|
|
2024-06-06 10:11:45 -07:00
|
|
|
models := []api.ListModelResponse{}
|
2024-05-06 16:34:13 -07:00
|
|
|
for n, m := range ms {
|
|
|
|
var cf ConfigV2
|
2024-08-05 17:13:52 -07:00
|
|
|
|
|
|
|
if m.Config.Digest != "" {
|
|
|
|
f, err := m.Config.Open()
|
|
|
|
if err != nil {
|
|
|
|
slog.Warn("bad manifest filepath", "name", n, "error", err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
|
|
|
|
if err := json.NewDecoder(f).Decode(&cf); err != nil {
|
|
|
|
slog.Warn("bad manifest config", "name", n, "error", err)
|
|
|
|
continue
|
|
|
|
}
|
2023-07-18 09:09:45 -07:00
|
|
|
}
|
2023-08-30 14:14:12 -04:00
|
|
|
|
2024-05-06 16:34:13 -07:00
|
|
|
// tag should never be masked
|
2024-06-06 10:11:45 -07:00
|
|
|
models = append(models, api.ListModelResponse{
|
2024-05-06 16:34:13 -07:00
|
|
|
Model: n.DisplayShortest(),
|
|
|
|
Name: n.DisplayShortest(),
|
|
|
|
Size: m.Size(),
|
|
|
|
Digest: m.digest,
|
|
|
|
ModifiedAt: m.fi.ModTime(),
|
|
|
|
Details: api.ModelDetails{
|
|
|
|
Format: cf.ModelFormat,
|
|
|
|
Family: cf.ModelFamily,
|
|
|
|
Families: cf.ModelFamilies,
|
|
|
|
ParameterSize: cf.ModelType,
|
|
|
|
QuantizationLevel: cf.FileType,
|
|
|
|
},
|
|
|
|
})
|
2023-07-18 09:09:45 -07:00
|
|
|
}
|
|
|
|
|
2024-06-06 10:11:45 -07:00
|
|
|
slices.SortStableFunc(models, func(i, j api.ListModelResponse) int {
|
2024-04-17 14:54:14 -07:00
|
|
|
// most recently modified first
|
|
|
|
return cmp.Compare(j.ModifiedAt.Unix(), i.ModifiedAt.Unix())
|
|
|
|
})
|
|
|
|
|
2023-07-19 15:00:28 -07:00
|
|
|
c.JSON(http.StatusOK, api.ListResponse{Models: models})
|
2023-07-18 09:09:45 -07:00
|
|
|
}
|
|
|
|
|
2024-08-26 19:36:11 -07:00
|
|
|
func (s *Server) CopyHandler(c *gin.Context) {
|
2024-04-16 16:22:38 -07:00
|
|
|
var r api.CopyRequest
|
|
|
|
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
|
2023-10-18 16:08:42 -07:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
|
|
|
return
|
2024-04-16 16:22:38 -07:00
|
|
|
} else if err != nil {
|
2023-10-18 16:08:42 -07:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
2023-07-24 11:27:28 -04:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-04-16 16:22:38 -07:00
|
|
|
src := model.ParseName(r.Source)
|
|
|
|
if !src.IsValid() {
|
2024-05-01 12:39:05 -07:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("source %q is invalid", r.Source)})
|
|
|
|
return
|
2023-10-18 15:56:34 -07:00
|
|
|
}
|
server: allow mixed-case model names on push, pull, cp, and create (#7676)
This change allows for mixed-case model names to be pushed, pulled,
copied, and created, which was previously disallowed because the Ollama
registry was backed by a Docker registry that enforced a naming
convention that disallowed mixed-case names, which is no longer the
case.
This does not break existing, intended, behaviors.
Also, make TestCase test a story of creating, updating, pulling, and
copying a model with case variations, ensuring the model's manifest is
updated correctly, and not duplicated across different files with
different case variations.
2024-11-19 15:05:57 -08:00
|
|
|
src, err := getExistingName(src)
|
|
|
|
if err != nil {
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
2023-10-18 15:56:34 -07:00
|
|
|
|
2024-04-16 16:22:38 -07:00
|
|
|
dst := model.ParseName(r.Destination)
|
|
|
|
if !dst.IsValid() {
|
2024-05-07 17:35:52 -07:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("destination %q is invalid", r.Destination)})
|
2023-07-24 11:27:28 -04:00
|
|
|
return
|
|
|
|
}
|
server: allow mixed-case model names on push, pull, cp, and create (#7676)
This change allows for mixed-case model names to be pushed, pulled,
copied, and created, which was previously disallowed because the Ollama
registry was backed by a Docker registry that enforced a naming
convention that disallowed mixed-case names, which is no longer the
case.
This does not break existing, intended, behaviors.
Also, make TestCase test a story of creating, updating, pulling, and
copying a model with case variations, ensuring the model's manifest is
updated correctly, and not duplicated across different files with
different case variations.
2024-11-19 15:05:57 -08:00
|
|
|
dst, err = getExistingName(dst)
|
|
|
|
if err != nil {
|
2024-05-13 15:27:51 -07:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-04-16 16:22:38 -07:00
|
|
|
if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
|
|
|
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
|
|
|
|
} else if err != nil {
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
}
|
2023-07-24 11:27:28 -04:00
|
|
|
}
|
|
|
|
|
2024-03-30 09:50:05 -07:00
|
|
|
func (s *Server) HeadBlobHandler(c *gin.Context) {
|
2023-11-14 14:07:40 -08:00
|
|
|
path, err := GetBlobsPath(c.Param("digest"))
|
|
|
|
if err != nil {
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if _, err := os.Stat(path); err != nil {
|
|
|
|
c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-11-15 13:55:37 -08:00
|
|
|
c.Status(http.StatusOK)
|
2023-11-14 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
2024-03-30 09:50:05 -07:00
|
|
|
func (s *Server) CreateBlobHandler(c *gin.Context) {
|
2024-05-20 14:58:27 -07:00
|
|
|
if ib, ok := intermediateBlobs[c.Param("digest")]; ok {
|
|
|
|
p, err := GetBlobsPath(ib)
|
2024-05-10 15:48:41 -07:00
|
|
|
if err != nil {
|
|
|
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if _, err := os.Stat(p); errors.Is(err, os.ErrNotExist) {
|
2024-05-20 14:58:27 -07:00
|
|
|
slog.Info("evicting intermediate blob which no longer exists", "digest", ib)
|
|
|
|
delete(intermediateBlobs, c.Param("digest"))
|
2024-05-10 15:48:41 -07:00
|
|
|
} else if err != nil {
|
|
|
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
} else {
|
|
|
|
c.Status(http.StatusOK)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-05 09:30:09 -07:00
|
|
|
path, err := GetBlobsPath(c.Param("digest"))
|
|
|
|
if err != nil {
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = os.Stat(path)
|
|
|
|
switch {
|
|
|
|
case errors.Is(err, os.ErrNotExist):
|
|
|
|
// noop
|
|
|
|
case err != nil:
|
|
|
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
default:
|
|
|
|
c.Status(http.StatusOK)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-11-24 12:01:23 -08:00
|
|
|
layer, err := NewLayer(c.Request.Body, "")
|
2023-11-17 15:21:57 -08:00
|
|
|
if err != nil {
|
|
|
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-11-24 12:01:23 -08:00
|
|
|
if layer.Digest != c.Param("digest") {
|
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("digest mismatch, expected %q, got %q", c.Param("digest"), layer.Digest)})
|
2023-11-14 14:07:40 -08:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-11-15 13:55:37 -08:00
|
|
|
c.Status(http.StatusCreated)
|
2023-11-14 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
2024-03-09 00:22:08 -08:00
|
|
|
func isLocalIP(ip netip.Addr) bool {
|
|
|
|
if interfaces, err := net.Interfaces(); err == nil {
|
|
|
|
for _, iface := range interfaces {
|
|
|
|
addrs, err := iface.Addrs()
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, a := range addrs {
|
|
|
|
if parsed, _, err := net.ParseCIDR(a.String()); err == nil {
|
|
|
|
if parsed.String() == ip.String() {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2024-03-08 22:23:47 -08:00
|
|
|
func allowedHost(host string) bool {
|
2024-12-10 22:43:22 +01:00
|
|
|
host = strings.ToLower(host)
|
|
|
|
|
2024-03-09 00:22:08 -08:00
|
|
|
if host == "" || host == "localhost" {
|
2024-03-08 22:23:47 -08:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2024-12-10 22:43:22 +01:00
|
|
|
if hostname, err := os.Hostname(); err == nil && host == strings.ToLower(hostname) {
|
2024-03-08 22:23:47 -08:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2024-08-01 14:52:15 -07:00
|
|
|
tlds := []string{
|
2024-03-08 23:23:59 -08:00
|
|
|
"localhost",
|
|
|
|
"local",
|
|
|
|
"internal",
|
2023-12-14 16:47:40 -08:00
|
|
|
}
|
2023-10-30 11:10:18 -04:00
|
|
|
|
2024-03-08 23:29:53 -08:00
|
|
|
// check if the host is a local TLD
|
2024-03-08 22:23:47 -08:00
|
|
|
for _, tld := range tlds {
|
|
|
|
if strings.HasSuffix(host, "."+tld) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-08 23:29:53 -08:00
|
|
|
return false
|
2024-03-08 23:23:59 -08:00
|
|
|
}
|
2024-03-08 22:23:47 -08:00
|
|
|
|
2024-03-08 23:23:59 -08:00
|
|
|
func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
|
|
|
|
return func(c *gin.Context) {
|
|
|
|
if addr == nil {
|
2024-03-08 22:23:47 -08:00
|
|
|
c.Next()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-03-09 00:22:08 -08:00
|
|
|
if addr, err := netip.ParseAddrPort(addr.String()); err == nil && !addr.Addr().IsLoopback() {
|
2024-03-08 22:23:47 -08:00
|
|
|
c.Next()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
host, _, err := net.SplitHostPort(c.Request.Host)
|
|
|
|
if err != nil {
|
|
|
|
host = c.Request.Host
|
|
|
|
}
|
|
|
|
|
2024-03-08 23:23:59 -08:00
|
|
|
if addr, err := netip.ParseAddr(host); err == nil {
|
2024-03-09 00:22:08 -08:00
|
|
|
if addr.IsLoopback() || addr.IsPrivate() || addr.IsUnspecified() || isLocalIP(addr) {
|
2024-03-08 23:23:59 -08:00
|
|
|
c.Next()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-08 22:23:47 -08:00
|
|
|
if allowedHost(host) {
|
2024-05-21 22:21:04 -07:00
|
|
|
if c.Request.Method == http.MethodOptions {
|
2024-05-08 13:14:00 -07:00
|
|
|
c.AbortWithStatus(http.StatusNoContent)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-03-08 22:23:47 -08:00
|
|
|
c.Next()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
c.AbortWithStatus(http.StatusForbidden)
|
|
|
|
}
|
2023-12-14 16:47:40 -08:00
|
|
|
}
|
2023-10-30 11:10:18 -04:00
|
|
|
|
2025-03-02 20:55:44 -08:00
|
|
|
func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
|
2025-02-21 21:02:26 -08:00
|
|
|
corsConfig := cors.DefaultConfig()
|
|
|
|
corsConfig.AllowWildcard = true
|
|
|
|
corsConfig.AllowBrowserExtensions = true
|
|
|
|
corsConfig.AllowHeaders = []string{
|
|
|
|
"Authorization",
|
|
|
|
"Content-Type",
|
|
|
|
"User-Agent",
|
|
|
|
"Accept",
|
|
|
|
"X-Requested-With",
|
|
|
|
|
|
|
|
// OpenAI compatibility headers
|
|
|
|
"x-stainless-lang",
|
|
|
|
"x-stainless-package-version",
|
|
|
|
"x-stainless-os",
|
|
|
|
"x-stainless-arch",
|
|
|
|
"x-stainless-retry-count",
|
|
|
|
"x-stainless-runtime",
|
|
|
|
"x-stainless-runtime-version",
|
|
|
|
"x-stainless-async",
|
|
|
|
"x-stainless-helper-method",
|
|
|
|
"x-stainless-poll-helper",
|
|
|
|
"x-stainless-custom-poll-interval",
|
|
|
|
"x-stainless-timeout",
|
|
|
|
}
|
|
|
|
corsConfig.AllowOrigins = envconfig.AllowedOrigins()
|
2023-07-21 18:01:24 -07:00
|
|
|
|
2023-07-05 15:37:33 -04:00
|
|
|
r := gin.Default()
|
2023-09-21 20:38:49 +01:00
|
|
|
r.Use(
|
2025-02-21 21:02:26 -08:00
|
|
|
cors.New(corsConfig),
|
2024-03-08 22:23:47 -08:00
|
|
|
allowedHostsMiddleware(s.addr),
|
2023-09-21 20:38:49 +01:00
|
|
|
)
|
2023-07-05 15:37:33 -04:00
|
|
|
|
2025-02-21 21:02:26 -08:00
|
|
|
// General
|
|
|
|
r.HEAD("/", func(c *gin.Context) { c.String(http.StatusOK, "Ollama is running") })
|
|
|
|
r.GET("/", func(c *gin.Context) { c.String(http.StatusOK, "Ollama is running") })
|
|
|
|
r.HEAD("/api/version", func(c *gin.Context) { c.JSON(http.StatusOK, gin.H{"version": version.Version}) })
|
|
|
|
r.GET("/api/version", func(c *gin.Context) { c.JSON(http.StatusOK, gin.H{"version": version.Version}) })
|
|
|
|
|
2025-02-27 12:04:53 -08:00
|
|
|
// Local model cache management (new implementation is at end of function)
|
2024-08-26 19:36:11 -07:00
|
|
|
r.POST("/api/pull", s.PullHandler)
|
|
|
|
r.POST("/api/push", s.PushHandler)
|
2025-02-21 21:02:26 -08:00
|
|
|
r.HEAD("/api/tags", s.ListHandler)
|
|
|
|
r.GET("/api/tags", s.ListHandler)
|
2024-08-26 19:36:11 -07:00
|
|
|
r.POST("/api/show", s.ShowHandler)
|
server/internal/registry: take over pulls from server package (#9485)
This commit replaces the old pull implementation in the server package
with the new, faster, more robust pull implementation in the registry
package.
The new endpoint, and now the remove endpoint too, are behind the
feature gate "client2" enabled only by setting the OLLAMA_EXPERIMENT
environment variable include "client2".
Currently, the progress indication is wired to perform the same as the
previous implementation to avoid making changes to the CLI, and because
the status reports happen at the start of the download, and the end of
the write to disk, the progress indication is not as smooth as it could
be. This is a known issue and will be addressed in a future change.
This implementation may be ~0.5-1.0% slower in rare cases, depending on
network and disk speed, but is generally MUCH faster and more robust
than the its predecessor in all other cases.
2025-03-05 14:48:18 -08:00
|
|
|
r.DELETE("/api/delete", s.DeleteHandler)
|
2025-02-21 21:02:26 -08:00
|
|
|
|
|
|
|
// Create
|
|
|
|
r.POST("/api/create", s.CreateHandler)
|
2024-03-30 09:50:05 -07:00
|
|
|
r.POST("/api/blobs/:digest", s.CreateBlobHandler)
|
|
|
|
r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
|
2025-02-21 21:02:26 -08:00
|
|
|
r.POST("/api/copy", s.CopyHandler)
|
|
|
|
|
|
|
|
// Inference
|
2024-08-26 19:36:11 -07:00
|
|
|
r.GET("/api/ps", s.PsHandler)
|
2025-02-21 21:02:26 -08:00
|
|
|
r.POST("/api/generate", s.GenerateHandler)
|
|
|
|
r.POST("/api/chat", s.ChatHandler)
|
|
|
|
r.POST("/api/embed", s.EmbedHandler)
|
|
|
|
r.POST("/api/embeddings", s.EmbeddingsHandler)
|
2023-07-03 15:22:44 -04:00
|
|
|
|
2025-02-21 21:02:26 -08:00
|
|
|
// Inference (OpenAI compatibility)
|
2024-07-02 11:50:56 -07:00
|
|
|
r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler)
|
2024-07-02 16:01:45 -07:00
|
|
|
r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler)
|
2024-07-16 13:36:08 -07:00
|
|
|
r.POST("/v1/embeddings", openai.EmbeddingsMiddleware(), s.EmbedHandler)
|
2024-08-26 19:36:11 -07:00
|
|
|
r.GET("/v1/models", openai.ListMiddleware(), s.ListHandler)
|
|
|
|
r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowHandler)
|
2024-02-07 17:24:29 -05:00
|
|
|
|
server/internal/registry: take over pulls from server package (#9485)
This commit replaces the old pull implementation in the server package
with the new, faster, more robust pull implementation in the registry
package.
The new endpoint, and now the remove endpoint too, are behind the
feature gate "client2" enabled only by setting the OLLAMA_EXPERIMENT
environment variable include "client2".
Currently, the progress indication is wired to perform the same as the
previous implementation to avoid making changes to the CLI, and because
the status reports happen at the start of the download, and the end of
the write to disk, the progress indication is not as smooth as it could
be. This is a known issue and will be addressed in a future change.
This implementation may be ~0.5-1.0% slower in rare cases, depending on
network and disk speed, but is generally MUCH faster and more robust
than the its predecessor in all other cases.
2025-03-05 14:48:18 -08:00
|
|
|
if rc != nil {
|
|
|
|
// wrap old with new
|
|
|
|
rs := ®istry.Local{
|
|
|
|
Client: rc,
|
|
|
|
Logger: slog.Default(), // TODO(bmizerany): Take a logger, do not use slog.Default()
|
|
|
|
Fallback: r,
|
2025-03-03 19:11:16 -08:00
|
|
|
|
server/internal/registry: take over pulls from server package (#9485)
This commit replaces the old pull implementation in the server package
with the new, faster, more robust pull implementation in the registry
package.
The new endpoint, and now the remove endpoint too, are behind the
feature gate "client2" enabled only by setting the OLLAMA_EXPERIMENT
environment variable include "client2".
Currently, the progress indication is wired to perform the same as the
previous implementation to avoid making changes to the CLI, and because
the status reports happen at the start of the download, and the end of
the write to disk, the progress indication is not as smooth as it could
be. This is a known issue and will be addressed in a future change.
This implementation may be ~0.5-1.0% slower in rare cases, depending on
network and disk speed, but is generally MUCH faster and more robust
than the its predecessor in all other cases.
2025-03-05 14:48:18 -08:00
|
|
|
Prune: PruneLayers,
|
|
|
|
}
|
|
|
|
return rs, nil
|
2025-02-27 12:04:53 -08:00
|
|
|
}
|
|
|
|
|
server/internal/registry: take over pulls from server package (#9485)
This commit replaces the old pull implementation in the server package
with the new, faster, more robust pull implementation in the registry
package.
The new endpoint, and now the remove endpoint too, are behind the
feature gate "client2" enabled only by setting the OLLAMA_EXPERIMENT
environment variable include "client2".
Currently, the progress indication is wired to perform the same as the
previous implementation to avoid making changes to the CLI, and because
the status reports happen at the start of the download, and the end of
the write to disk, the progress indication is not as smooth as it could
be. This is a known issue and will be addressed in a future change.
This implementation may be ~0.5-1.0% slower in rare cases, depending on
network and disk speed, but is generally MUCH faster and more robust
than the its predecessor in all other cases.
2025-03-05 14:48:18 -08:00
|
|
|
return r, nil
|
2023-12-14 16:47:40 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
func Serve(ln net.Listener) error {
|
2024-01-31 14:59:32 -08:00
|
|
|
level := slog.LevelInfo
|
2024-07-03 16:00:54 -07:00
|
|
|
if envconfig.Debug() {
|
2024-01-31 14:59:32 -08:00
|
|
|
level = slog.LevelDebug
|
2024-01-18 10:52:01 -08:00
|
|
|
}
|
2024-01-31 14:59:32 -08:00
|
|
|
|
2024-05-24 14:57:15 -07:00
|
|
|
slog.Info("server config", "env", envconfig.Values())
|
2024-01-31 14:59:32 -08:00
|
|
|
handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
|
|
|
Level: level,
|
|
|
|
AddSource: true,
|
|
|
|
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
|
|
|
|
if attr.Key == slog.SourceKey {
|
|
|
|
source := attr.Value.Any().(*slog.Source)
|
|
|
|
source.File = filepath.Base(source.File)
|
|
|
|
}
|
|
|
|
|
|
|
|
return attr
|
|
|
|
},
|
|
|
|
})
|
|
|
|
|
|
|
|
slog.SetDefault(slog.New(handler))
|
|
|
|
|
2024-03-14 20:18:06 -07:00
|
|
|
blobsDir, err := GetBlobsPath("")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := fixBlobs(blobsDir); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-07-03 17:22:13 -07:00
|
|
|
if !envconfig.NoPrune() {
|
2024-11-05 14:21:45 -08:00
|
|
|
if _, err := Manifests(false); err != nil {
|
|
|
|
slog.Warn("corrupt manifests detected, skipping prune operation. Re-pull or delete to clear", "error", err)
|
|
|
|
} else {
|
|
|
|
// clean up unused layers and manifests
|
|
|
|
if err := PruneLayers(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2023-12-14 16:47:40 -08:00
|
|
|
|
2024-11-05 14:21:45 -08:00
|
|
|
manifestsPath, err := GetManifestPath()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2023-12-14 16:47:40 -08:00
|
|
|
|
2024-11-05 14:21:45 -08:00
|
|
|
if err := PruneDirectory(manifestsPath); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2023-12-14 16:47:40 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-02-27 12:04:53 -08:00
|
|
|
s := &Server{addr: ln.Addr()}
|
|
|
|
|
server/internal/registry: take over pulls from server package (#9485)
This commit replaces the old pull implementation in the server package
with the new, faster, more robust pull implementation in the registry
package.
The new endpoint, and now the remove endpoint too, are behind the
feature gate "client2" enabled only by setting the OLLAMA_EXPERIMENT
environment variable include "client2".
Currently, the progress indication is wired to perform the same as the
previous implementation to avoid making changes to the CLI, and because
the status reports happen at the start of the download, and the end of
the write to disk, the progress indication is not as smooth as it could
be. This is a known issue and will be addressed in a future change.
This implementation may be ~0.5-1.0% slower in rare cases, depending on
network and disk speed, but is generally MUCH faster and more robust
than the its predecessor in all other cases.
2025-03-05 14:48:18 -08:00
|
|
|
var rc *ollama.Registry
|
|
|
|
if useClient2 {
|
|
|
|
var err error
|
|
|
|
rc, err = ollama.DefaultRegistry()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2025-02-27 12:04:53 -08:00
|
|
|
}
|
|
|
|
|
2025-03-02 20:55:44 -08:00
|
|
|
h, err := s.GenerateRoutes(rc)
|
2025-02-27 12:04:53 -08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
server/internal/registry: take over pulls from server package (#9485)
This commit replaces the old pull implementation in the server package
with the new, faster, more robust pull implementation in the registry
package.
The new endpoint, and now the remove endpoint too, are behind the
feature gate "client2" enabled only by setting the OLLAMA_EXPERIMENT
environment variable include "client2".
Currently, the progress indication is wired to perform the same as the
previous implementation to avoid making changes to the CLI, and because
the status reports happen at the start of the download, and the end of
the write to disk, the progress indication is not as smooth as it could
be. This is a known issue and will be addressed in a future change.
This implementation may be ~0.5-1.0% slower in rare cases, depending on
network and disk speed, but is generally MUCH faster and more robust
than the its predecessor in all other cases.
2025-03-05 14:48:18 -08:00
|
|
|
|
2025-02-27 12:04:53 -08:00
|
|
|
http.Handle("/", h)
|
|
|
|
|
2024-03-30 09:50:05 -07:00
|
|
|
ctx, done := context.WithCancel(context.Background())
|
2024-05-09 15:47:02 -07:00
|
|
|
schedCtx, schedDone := context.WithCancel(ctx)
|
|
|
|
sched := InitScheduler(schedCtx)
|
2025-02-27 12:04:53 -08:00
|
|
|
s.sched = sched
|
2023-12-14 16:47:40 -08:00
|
|
|
|
2024-01-18 10:52:01 -08:00
|
|
|
slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
|
2023-12-14 16:47:40 -08:00
|
|
|
srvr := &http.Server{
|
2024-06-24 21:47:52 -07:00
|
|
|
// Use http.DefaultServeMux so we get net/http/pprof for
|
|
|
|
// free.
|
|
|
|
//
|
|
|
|
// TODO(bmizerany): Decide if we want to make this
|
|
|
|
// configurable so it is not exposed by default, or allow
|
|
|
|
// users to bind it to a different port. This was a quick
|
|
|
|
// and easy way to get pprof, but it may not be the best
|
|
|
|
// way.
|
|
|
|
Handler: nil,
|
2023-07-03 15:22:44 -04:00
|
|
|
}
|
|
|
|
|
2023-08-30 16:35:03 -04:00
|
|
|
// listen for a ctrl+c and stop any loaded llm
|
|
|
|
signals := make(chan os.Signal, 1)
|
2023-09-21 20:38:49 +01:00
|
|
|
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
|
2023-08-30 16:35:03 -04:00
|
|
|
go func() {
|
|
|
|
<-signals
|
2024-05-06 16:01:37 -07:00
|
|
|
srvr.Close()
|
2024-05-09 15:47:02 -07:00
|
|
|
schedDone()
|
2024-03-30 09:50:05 -07:00
|
|
|
sched.unloadAllRunners()
|
2024-05-09 15:47:02 -07:00
|
|
|
done()
|
2023-08-30 16:35:03 -04:00
|
|
|
}()
|
|
|
|
|
2024-05-09 15:47:02 -07:00
|
|
|
s.sched.Run(schedCtx)
|
2024-03-30 09:50:05 -07:00
|
|
|
|
|
|
|
// At startup we retrieve GPU information so we can get log messages before loading a model
|
|
|
|
// This will log warnings to the log in case we have problems with detected GPUs
|
2024-10-16 17:45:00 -07:00
|
|
|
gpus := discover.GetGPUInfo()
|
2024-05-07 14:54:26 -07:00
|
|
|
gpus.LogDetails()
|
2023-09-12 11:04:35 -04:00
|
|
|
|
2024-05-09 15:47:02 -07:00
|
|
|
err = srvr.Serve(ln)
|
|
|
|
// If server is closed from the signal handler, wait for the ctx to be done
|
|
|
|
// otherwise error out quickly
|
|
|
|
if !errors.Is(err, http.ErrServerClosed) {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
<-ctx.Done()
|
2024-05-16 16:25:38 -07:00
|
|
|
return nil
|
2023-07-03 15:22:44 -04:00
|
|
|
}
|
2023-07-06 10:40:11 -07:00
|
|
|
|
2023-10-11 12:54:27 -04:00
|
|
|
func waitForStream(c *gin.Context, ch chan interface{}) {
|
|
|
|
c.Header("Content-Type", "application/json")
|
|
|
|
for resp := range ch {
|
|
|
|
switch r := resp.(type) {
|
|
|
|
case api.ProgressResponse:
|
|
|
|
if r.Status == "success" {
|
|
|
|
c.JSON(http.StatusOK, r)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
case gin.H:
|
2024-07-19 15:24:29 -07:00
|
|
|
status, ok := r["status"].(int)
|
|
|
|
if !ok {
|
|
|
|
status = http.StatusInternalServerError
|
|
|
|
}
|
2023-10-11 12:54:27 -04:00
|
|
|
if errorMsg, ok := r["error"].(string); ok {
|
2024-07-19 15:24:29 -07:00
|
|
|
c.JSON(status, gin.H{"error": errorMsg})
|
2023-10-11 12:54:27 -04:00
|
|
|
return
|
|
|
|
} else {
|
2024-07-19 15:24:29 -07:00
|
|
|
c.JSON(status, gin.H{"error": "unexpected error format in progress response"})
|
2023-10-11 12:54:27 -04:00
|
|
|
return
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
|
|
|
|
}
|
|
|
|
|
2023-07-14 14:15:53 -07:00
|
|
|
func streamResponse(c *gin.Context, ch chan any) {
|
2023-08-08 21:38:10 -07:00
|
|
|
c.Header("Content-Type", "application/x-ndjson")
|
2023-07-11 11:54:22 -07:00
|
|
|
c.Stream(func(w io.Writer) bool {
|
|
|
|
val, ok := <-ch
|
|
|
|
if !ok {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
bts, err := json.Marshal(val)
|
|
|
|
if err != nil {
|
2024-01-18 10:52:01 -08:00
|
|
|
slog.Info(fmt.Sprintf("streamResponse: json.Marshal failed with %s", err))
|
2023-07-11 11:54:22 -07:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-09-30 00:45:52 -04:00
|
|
|
// Delineate chunks with new-line delimiter
|
2023-07-11 11:54:22 -07:00
|
|
|
bts = append(bts, '\n')
|
|
|
|
if _, err := w.Write(bts); err != nil {
|
2024-01-18 10:52:01 -08:00
|
|
|
slog.Info(fmt.Sprintf("streamResponse: w.Write failed with %s", err))
|
2023-07-11 11:54:22 -07:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
}
|
2023-12-05 14:57:33 -05:00
|
|
|
|
2024-08-26 19:36:11 -07:00
|
|
|
func (s *Server) PsHandler(c *gin.Context) {
|
2024-06-06 10:11:45 -07:00
|
|
|
models := []api.ProcessModelResponse{}
|
2024-05-13 17:17:36 -07:00
|
|
|
|
|
|
|
for _, v := range s.sched.loaded {
|
|
|
|
model := v.model
|
|
|
|
modelDetails := api.ModelDetails{
|
|
|
|
Format: model.Config.ModelFormat,
|
|
|
|
Family: model.Config.ModelFamily,
|
|
|
|
Families: model.Config.ModelFamilies,
|
|
|
|
ParameterSize: model.Config.ModelType,
|
|
|
|
QuantizationLevel: model.Config.FileType,
|
|
|
|
}
|
|
|
|
|
2024-06-06 10:11:45 -07:00
|
|
|
mr := api.ProcessModelResponse{
|
2024-05-13 17:17:36 -07:00
|
|
|
Model: model.ShortName,
|
|
|
|
Name: model.ShortName,
|
|
|
|
Size: int64(v.estimatedTotal),
|
|
|
|
SizeVRAM: int64(v.estimatedVRAM),
|
|
|
|
Digest: model.Digest,
|
|
|
|
Details: modelDetails,
|
|
|
|
ExpiresAt: v.expiresAt,
|
|
|
|
}
|
2024-05-15 15:43:16 -07:00
|
|
|
// The scheduler waits to set expiresAt, so if a model is loading it's
|
|
|
|
// possible that it will be set to the unix epoch. For those cases, just
|
|
|
|
// calculate the time w/ the sessionDuration instead.
|
|
|
|
var epoch time.Time
|
|
|
|
if v.expiresAt == epoch {
|
|
|
|
mr.ExpiresAt = time.Now().Add(v.sessionDuration)
|
|
|
|
}
|
|
|
|
|
2024-05-13 17:17:36 -07:00
|
|
|
models = append(models, mr)
|
|
|
|
}
|
|
|
|
|
2024-06-21 15:59:41 -07:00
|
|
|
slices.SortStableFunc(models, func(i, j api.ProcessModelResponse) int {
|
|
|
|
// longest duration remaining listed first
|
|
|
|
return cmp.Compare(j.ExpiresAt.Unix(), i.ExpiresAt.Unix())
|
|
|
|
})
|
|
|
|
|
2024-06-06 10:11:45 -07:00
|
|
|
c.JSON(http.StatusOK, api.ProcessResponse{Models: models})
|
2024-05-13 17:17:36 -07:00
|
|
|
}
|
|
|
|
|
2024-03-30 09:50:05 -07:00
|
|
|
func (s *Server) ChatHandler(c *gin.Context) {
|
2024-07-13 09:25:31 -07:00
|
|
|
checkpointStart := time.Now()
|
|
|
|
|
2023-12-05 14:57:33 -05:00
|
|
|
var req api.ChatRequest
|
2024-06-17 10:38:55 -07:00
|
|
|
if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
|
2023-12-05 14:57:33 -05:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
|
|
|
return
|
2024-06-17 10:38:55 -07:00
|
|
|
} else if err != nil {
|
2023-12-05 14:57:33 -05:00
|
|
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-09-11 16:36:21 -07:00
|
|
|
// expire the runner
|
|
|
|
if len(req.Messages) == 0 && req.KeepAlive != nil && int(req.KeepAlive.Seconds()) == 0 {
|
|
|
|
model, err := GetModel(req.Model)
|
|
|
|
if err != nil {
|
|
|
|
switch {
|
|
|
|
case os.IsNotExist(err):
|
|
|
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
|
2024-12-23 23:38:34 +08:00
|
|
|
case err.Error() == errtypes.InvalidModelNameErrMsg:
|
2024-09-11 16:36:21 -07:00
|
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
|
|
|
default:
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
s.sched.expireRunner(model)
|
|
|
|
|
|
|
|
c.JSON(http.StatusOK, api.ChatResponse{
|
|
|
|
Model: req.Model,
|
|
|
|
CreatedAt: time.Now().UTC(),
|
|
|
|
Message: api.Message{Role: "assistant"},
|
|
|
|
Done: true,
|
|
|
|
DoneReason: "unload",
|
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-06-17 10:38:55 -07:00
|
|
|
caps := []Capability{CapabilityCompletion}
|
2024-07-18 11:44:57 -07:00
|
|
|
if len(req.Tools) > 0 {
|
2024-06-20 13:45:47 -07:00
|
|
|
caps = append(caps, CapabilityTools)
|
|
|
|
}
|
|
|
|
|
2024-12-11 15:29:59 -08:00
|
|
|
name := model.ParseName(req.Model)
|
|
|
|
if !name.IsValid() {
|
|
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
name, err := getExistingName(name)
|
|
|
|
if err != nil {
|
|
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
|
2024-06-17 10:38:55 -07:00
|
|
|
if errors.Is(err, errCapabilityCompletion) {
|
|
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
|
2023-12-05 14:57:33 -05:00
|
|
|
return
|
2024-06-17 10:38:55 -07:00
|
|
|
} else if err != nil {
|
2024-06-20 11:00:08 -07:00
|
|
|
handleScheduleError(c, req.Model, err)
|
2023-12-05 14:57:33 -05:00
|
|
|
return
|
|
|
|
}
|
2024-01-31 17:39:38 -08:00
|
|
|
|
2024-07-13 09:25:31 -07:00
|
|
|
checkpointLoaded := time.Now()
|
|
|
|
|
2024-06-17 10:38:55 -07:00
|
|
|
if len(req.Messages) == 0 {
|
|
|
|
c.JSON(http.StatusOK, api.ChatResponse{
|
2024-05-09 13:30:14 -07:00
|
|
|
Model: req.Model,
|
2024-06-17 10:38:55 -07:00
|
|
|
CreatedAt: time.Now().UTC(),
|
|
|
|
Message: api.Message{Role: "assistant"},
|
2024-05-09 13:30:14 -07:00
|
|
|
Done: true,
|
|
|
|
DoneReason: "load",
|
2024-06-17 10:38:55 -07:00
|
|
|
})
|
2024-02-07 19:30:33 -05:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-06-19 14:14:28 -07:00
|
|
|
msgs := append(m.Messages, req.Messages...)
|
2024-07-16 11:09:00 -07:00
|
|
|
if req.Messages[0].Role != "system" && m.System != "" {
|
2024-06-19 14:14:28 -07:00
|
|
|
msgs = append([]api.Message{{Role: "system", Content: m.System}}, msgs...)
|
2024-07-13 15:08:00 -07:00
|
|
|
}
|
|
|
|
|
2024-06-19 14:14:28 -07:00
|
|
|
prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, msgs, req.Tools)
|
2024-06-17 10:38:55 -07:00
|
|
|
if err != nil {
|
2024-11-27 13:40:57 -08:00
|
|
|
slog.Error("chat prompt error", "error", err)
|
2024-06-17 10:38:55 -07:00
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
|
|
return
|
2024-02-12 15:06:57 -08:00
|
|
|
}
|
|
|
|
|
2024-06-17 10:38:55 -07:00
|
|
|
slog.Debug("chat request", "images", len(images), "prompt", prompt)
|
2024-01-28 15:22:35 -08:00
|
|
|
|
2023-12-05 14:57:33 -05:00
|
|
|
ch := make(chan any)
|
|
|
|
go func() {
|
|
|
|
defer close(ch)
|
2024-11-27 13:40:57 -08:00
|
|
|
var sb strings.Builder
|
2024-11-29 20:00:09 -08:00
|
|
|
var toolCallIndex int = 0
|
2024-07-03 09:00:07 -07:00
|
|
|
if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
|
2024-06-17 10:38:55 -07:00
|
|
|
Prompt: prompt,
|
|
|
|
Images: images,
|
|
|
|
Format: req.Format,
|
2024-07-03 09:00:07 -07:00
|
|
|
Options: opts,
|
2024-06-17 10:38:55 -07:00
|
|
|
}, func(r llm.CompletionResponse) {
|
2024-07-13 09:25:31 -07:00
|
|
|
res := api.ChatResponse{
|
2024-05-09 13:30:14 -07:00
|
|
|
Model: req.Model,
|
|
|
|
CreatedAt: time.Now().UTC(),
|
|
|
|
Message: api.Message{Role: "assistant", Content: r.Content},
|
|
|
|
Done: r.Done,
|
|
|
|
DoneReason: r.DoneReason,
|
2023-12-05 14:57:33 -05:00
|
|
|
Metrics: api.Metrics{
|
|
|
|
PromptEvalCount: r.PromptEvalCount,
|
|
|
|
PromptEvalDuration: r.PromptEvalDuration,
|
|
|
|
EvalCount: r.EvalCount,
|
|
|
|
EvalDuration: r.EvalDuration,
|
|
|
|
},
|
|
|
|
}
|
2024-07-13 09:25:31 -07:00
|
|
|
|
|
|
|
if r.Done {
|
|
|
|
res.TotalDuration = time.Since(checkpointStart)
|
|
|
|
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
|
|
|
}
|
|
|
|
|
2024-11-27 13:40:57 -08:00
|
|
|
// TODO: tool call checking and filtering should be moved outside of this callback once streaming
|
|
|
|
// however this was a simple change for now without reworking streaming logic of this (and other)
|
|
|
|
// handlers
|
|
|
|
if req.Stream != nil && !*req.Stream || len(req.Tools) == 0 {
|
|
|
|
ch <- res
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Streaming tool calls:
|
|
|
|
// If tools are recognized, use a flag to track the sending of a tool downstream
|
|
|
|
// This ensures that content is cleared from the message on the last chunk sent
|
|
|
|
sb.WriteString(r.Content)
|
|
|
|
if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
|
|
|
|
res.Message.ToolCalls = toolCalls
|
2024-11-29 20:00:09 -08:00
|
|
|
for i := range toolCalls {
|
|
|
|
toolCalls[i].Function.Index = toolCallIndex
|
|
|
|
toolCallIndex++
|
|
|
|
}
|
2024-11-27 13:40:57 -08:00
|
|
|
res.Message.Content = ""
|
|
|
|
sb.Reset()
|
|
|
|
ch <- res
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if r.Done {
|
|
|
|
// Send any remaining content if no tool calls were detected
|
2024-11-29 20:00:09 -08:00
|
|
|
if toolCallIndex == 0 {
|
2024-11-27 13:40:57 -08:00
|
|
|
res.Message.Content = sb.String()
|
|
|
|
}
|
|
|
|
ch <- res
|
|
|
|
}
|
2024-06-17 10:38:55 -07:00
|
|
|
}); err != nil {
|
2023-12-05 14:57:33 -05:00
|
|
|
ch <- gin.H{"error": err.Error()}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
if req.Stream != nil && !*req.Stream {
|
2024-06-20 13:45:47 -07:00
|
|
|
var resp api.ChatResponse
|
2023-12-05 14:57:33 -05:00
|
|
|
var sb strings.Builder
|
2024-06-17 10:38:55 -07:00
|
|
|
for rr := range ch {
|
|
|
|
switch t := rr.(type) {
|
2023-12-10 10:53:38 -05:00
|
|
|
case api.ChatResponse:
|
2024-06-17 10:38:55 -07:00
|
|
|
sb.WriteString(t.Message.Content)
|
2024-06-20 13:45:47 -07:00
|
|
|
resp = t
|
2023-12-10 10:53:38 -05:00
|
|
|
case gin.H:
|
2024-06-17 10:38:55 -07:00
|
|
|
msg, ok := t["error"].(string)
|
|
|
|
if !ok {
|
|
|
|
msg = "unexpected error format in response"
|
2023-12-10 10:53:38 -05:00
|
|
|
}
|
2024-06-17 10:38:55 -07:00
|
|
|
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
|
|
|
|
return
|
2023-12-10 10:53:38 -05:00
|
|
|
default:
|
2024-06-17 10:38:55 -07:00
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
|
2023-12-10 10:53:38 -05:00
|
|
|
return
|
2023-12-05 14:57:33 -05:00
|
|
|
}
|
|
|
|
}
|
2023-12-10 10:53:38 -05:00
|
|
|
|
2024-06-20 13:45:47 -07:00
|
|
|
resp.Message.Content = sb.String()
|
2024-07-18 08:50:23 -07:00
|
|
|
|
|
|
|
if len(req.Tools) > 0 {
|
|
|
|
if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
|
|
|
|
resp.Message.ToolCalls = toolCalls
|
|
|
|
resp.Message.Content = ""
|
|
|
|
}
|
2024-06-20 13:45:47 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
c.JSON(http.StatusOK, resp)
|
2023-12-05 14:57:33 -05:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
streamResponse(c, ch)
|
|
|
|
}
|
2024-05-03 16:25:57 -07:00
|
|
|
|
2024-06-20 11:00:08 -07:00
|
|
|
func handleScheduleError(c *gin.Context, name string, err error) {
|
2024-06-17 10:38:55 -07:00
|
|
|
switch {
|
2024-06-20 19:13:36 -07:00
|
|
|
case errors.Is(err, errCapabilities), errors.Is(err, errRequired):
|
2024-06-20 11:00:08 -07:00
|
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
2024-06-17 10:38:55 -07:00
|
|
|
case errors.Is(err, context.Canceled):
|
2024-05-03 16:25:57 -07:00
|
|
|
c.JSON(499, gin.H{"error": "request canceled"})
|
2024-06-17 10:38:55 -07:00
|
|
|
case errors.Is(err, ErrMaxQueue):
|
2024-05-03 16:25:57 -07:00
|
|
|
c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
|
2024-06-20 11:00:08 -07:00
|
|
|
case errors.Is(err, os.ErrNotExist):
|
|
|
|
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found, try pulling it first", name)})
|
2024-06-17 10:38:55 -07:00
|
|
|
default:
|
|
|
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
2024-05-03 16:25:57 -07:00
|
|
|
}
|
|
|
|
}
|