diff --git a/fs/config.go b/fs/config.go new file mode 100644 index 000000000..bc5bfa550 --- /dev/null +++ b/fs/config.go @@ -0,0 +1,13 @@ +package fs + +type Config interface { + Architecture() string + String(string, ...string) string + Uint(string, ...uint32) uint32 + Float(string, ...float32) float32 + Bool(string, ...bool) bool + + Strings(string, ...[]string) []string + Uints(string, ...[]uint32) []uint32 + Floats(string, ...[]float32) []float32 +} diff --git a/kvcache/causal_test.go b/kvcache/causal_test.go index bf98abef6..517e3726d 100644 --- a/kvcache/causal_test.go +++ b/kvcache/causal_test.go @@ -5,6 +5,7 @@ import ( "slices" "testing" + "github.com/ollama/ollama/fs" "github.com/ollama/ollama/ml" "github.com/ollama/ollama/model/input" ) @@ -373,7 +374,7 @@ func TestCanResume(t *testing.T) { type testBackend struct{} -func (b *testBackend) Config() ml.Config { +func (b *testBackend) Config() fs.Config { panic("not implemented") } diff --git a/ml/backend.go b/ml/backend.go index cfb18d6a9..b22ba7952 100644 --- a/ml/backend.go +++ b/ml/backend.go @@ -9,22 +9,12 @@ import ( "slices" "strconv" "strings" + + "github.com/ollama/ollama/fs" ) -type Config interface { - Architecture() string - String(string, ...string) string - Uint(string, ...uint32) uint32 - Float(string, ...float32) float32 - Bool(string, ...bool) bool - - Strings(string, ...[]string) []string - Uints(string, ...[]uint32) []uint32 - Floats(string, ...[]float32) []float32 -} - type Backend interface { - Config() Config + Config() fs.Config Get(name string) Tensor NewContext() Context NewContextSize(size int) Context diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index b6f59ae0e..17f063840 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -24,7 +24,8 @@ import ( "unsafe" "github.com/ollama/ollama/format" - fs "github.com/ollama/ollama/fs/ggml" + "github.com/ollama/ollama/fs" + fsggml "github.com/ollama/ollama/fs/ggml" "github.com/ollama/ollama/ml" ggml "github.com/ollama/ollama/ml/backend/ggml/ggml/src" "golang.org/x/sync/errgroup" @@ -41,7 +42,7 @@ func devices() []*C.struct_ggml_backend_device { } type Backend struct { - meta *fs.GGML + meta *fsggml.GGML sched *C.struct_ggml_backend_sched tensors map[string]*C.struct_ggml_tensor @@ -58,7 +59,7 @@ type Backend struct { } func New(ctx context.Context, r *os.File, params ml.BackendParams) (ml.Backend, error) { - meta, n, err := fs.Decode(r, -1) + meta, n, err := fsggml.Decode(r, -1) if err != nil { return nil, err } @@ -182,7 +183,7 @@ func New(ctx context.Context, r *os.File, params ml.BackendParams) (ml.Backend, maxTensors += blocks * 2 type tensor struct { - source *fs.Tensor + source *fsggml.Tensor target string } @@ -413,7 +414,7 @@ func init() { ml.RegisterBackend("ggml", New) } -func (b *Backend) Config() ml.Config { +func (b *Backend) Config() fs.Config { return b.meta.KV() } diff --git a/model/model.go b/model/model.go index 8355a55a8..bc8944d22 100644 --- a/model/model.go +++ b/model/model.go @@ -16,7 +16,8 @@ import ( _ "golang.org/x/image/tiff" _ "golang.org/x/image/webp" - fs "github.com/ollama/ollama/fs/ggml" + "github.com/ollama/ollama/fs" + fsggml "github.com/ollama/ollama/fs/ggml" "github.com/ollama/ollama/kvcache" "github.com/ollama/ollama/ml" _ "github.com/ollama/ollama/ml/backend" @@ -83,10 +84,10 @@ func (m *Base) Config() config { return m.config } -var models = make(map[string]func(ml.Config) (Model, error)) +var models = make(map[string]func(fs.Config) (Model, error)) // Register registers a model constructor for the given architecture -func Register(name string, f func(ml.Config) (Model, error)) { +func Register(name string, f func(fs.Config) (Model, error)) { if _, ok := models[name]; ok { panic("model: model already registered") } @@ -131,14 +132,14 @@ func NewTextProcessor(s string) (TextProcessor, error) { return nil, err } defer r.Close() - meta, _, err := fs.Decode(r, -1) + meta, _, err := fsggml.Decode(r, -1) if err != nil { return nil, err } return getTextProcessor(meta.KV()) } -func getTextProcessor(kv fs.KV) (TextProcessor, error) { +func getTextProcessor(kv fsggml.KV) (TextProcessor, error) { arch := kv.Architecture() f, ok := models[arch] if !ok { diff --git a/model/model_test.go b/model/model_test.go index 0b1ea08e8..717c425e2 100644 --- a/model/model_test.go +++ b/model/model_test.go @@ -7,7 +7,8 @@ import ( "testing" "github.com/google/go-cmp/cmp" - fs "github.com/ollama/ollama/fs/ggml" + "github.com/ollama/ollama/fs" + fsggml "github.com/ollama/ollama/fs/ggml" "github.com/ollama/ollama/ml" "github.com/ollama/ollama/ml/backend/ggml" "github.com/ollama/ollama/ml/nn" @@ -139,7 +140,7 @@ func TestPopulateFieldsAlternateName(t *testing.T) { } func TestGetTextProcessor(t *testing.T) { - tp, err := getTextProcessor(fs.KV{}) + tp, err := getTextProcessor(fsggml.KV{}) if err == nil { t.Error("expected error") } else if !strings.Contains(err.Error(), "unsupported model architecture") { @@ -148,10 +149,10 @@ func TestGetTextProcessor(t *testing.T) { t.Error("expected nil tp") } - models["dummy"] = func(ml.Config) (Model, error) { + models["dummy"] = func(fs.Config) (Model, error) { return notTextProcessorModel{}, nil } - tp, err = getTextProcessor(fs.KV{"general.architecture": "dummy"}) + tp, err = getTextProcessor(fsggml.KV{"general.architecture": "dummy"}) if err == nil { t.Error("expected error") } else if !strings.Contains(err.Error(), "not a TextProcessor") { diff --git a/model/models/gemma2/model.go b/model/models/gemma2/model.go index b8f5f0666..752cb5cc2 100644 --- a/model/models/gemma2/model.go +++ b/model/models/gemma2/model.go @@ -3,6 +3,7 @@ package gemma2 import ( "math" + "github.com/ollama/ollama/fs" "github.com/ollama/ollama/kvcache" "github.com/ollama/ollama/ml" "github.com/ollama/ollama/ml/nn" @@ -35,7 +36,7 @@ const ( gemma27BLayerCount = 46 ) -func New(c ml.Config) (model.Model, error) { +func New(c fs.Config) (model.Model, error) { m := Model{ SentencePieceModel: model.NewSentencePieceModel( &model.Vocabulary{ diff --git a/model/models/gemma3/model.go b/model/models/gemma3/model.go index f9c53343a..cef058e2a 100644 --- a/model/models/gemma3/model.go +++ b/model/models/gemma3/model.go @@ -6,6 +6,7 @@ import ( "math" "slices" + "github.com/ollama/ollama/fs" "github.com/ollama/ollama/kvcache" "github.com/ollama/ollama/ml" "github.com/ollama/ollama/ml/nn" @@ -52,7 +53,7 @@ func (p *MultiModalProjector) Forward(ctx ml.Context, visionOutputs ml.Tensor, i return visionOutputs } -func New(c ml.Config) (model.Model, error) { +func New(c fs.Config) (model.Model, error) { m := Model{ SentencePieceModel: model.NewSentencePieceModel( &model.Vocabulary{ diff --git a/model/models/gemma3/model_text.go b/model/models/gemma3/model_text.go index 7b2b83c02..3b640a968 100644 --- a/model/models/gemma3/model_text.go +++ b/model/models/gemma3/model_text.go @@ -3,6 +3,7 @@ package gemma3 import ( "math" + "github.com/ollama/ollama/fs" "github.com/ollama/ollama/kvcache" "github.com/ollama/ollama/ml" "github.com/ollama/ollama/ml/nn" @@ -40,7 +41,7 @@ const ( cacheTypeCausal ) -func newTextModel(c ml.Config) *TextModel { +func newTextModel(c fs.Config) *TextModel { numBlocks := int(c.Uint("block_count")) m := TextModel{ diff --git a/model/models/gemma3/model_vision.go b/model/models/gemma3/model_vision.go index 94aa27bd7..636a363df 100644 --- a/model/models/gemma3/model_vision.go +++ b/model/models/gemma3/model_vision.go @@ -3,6 +3,7 @@ package gemma3 import ( "math" + "github.com/ollama/ollama/fs" "github.com/ollama/ollama/ml" "github.com/ollama/ollama/ml/nn" ) @@ -111,7 +112,7 @@ func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor) ml.Tensor { return hiddenState } -func newVisionModel(c ml.Config) *VisionModel { +func newVisionModel(c fs.Config) *VisionModel { return &VisionModel{ Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count")), VisionModelOptions: &VisionModelOptions{ diff --git a/model/models/gemma3/process_image.go b/model/models/gemma3/process_image.go index fe8269a3b..611a17bd7 100644 --- a/model/models/gemma3/process_image.go +++ b/model/models/gemma3/process_image.go @@ -3,7 +3,7 @@ package gemma3 import ( "image" - "github.com/ollama/ollama/ml" + "github.com/ollama/ollama/fs" "github.com/ollama/ollama/model/imageproc" ) @@ -11,7 +11,7 @@ type ImageProcessor struct { imageSize, patchSize, numChannels int } -func newImageProcessor(c ml.Config) ImageProcessor { +func newImageProcessor(c fs.Config) ImageProcessor { return ImageProcessor{ imageSize: int(c.Uint("vision.image_size")), patchSize: int(c.Uint("vision.patch_size")), diff --git a/model/models/llama/model.go b/model/models/llama/model.go index 5c173997b..68980dd76 100644 --- a/model/models/llama/model.go +++ b/model/models/llama/model.go @@ -5,6 +5,7 @@ import ( "math" "strings" + "github.com/ollama/ollama/fs" "github.com/ollama/ollama/kvcache" "github.com/ollama/ollama/ml" "github.com/ollama/ollama/ml/nn" @@ -30,7 +31,7 @@ type Model struct { *Options } -func New(c ml.Config) (model.Model, error) { +func New(c fs.Config) (model.Model, error) { if !strings.EqualFold(c.String("tokenizer.ggml.model"), "gpt2") { return nil, fmt.Errorf("tokenizer %s not yet supported", c.String("tokenizer.ggml.model")) } diff --git a/model/models/mllama/model.go b/model/models/mllama/model.go index 988a189d4..e53eb184c 100644 --- a/model/models/mllama/model.go +++ b/model/models/mllama/model.go @@ -8,6 +8,7 @@ import ( "image" "slices" + "github.com/ollama/ollama/fs" "github.com/ollama/ollama/kvcache" "github.com/ollama/ollama/ml" "github.com/ollama/ollama/ml/nn" @@ -32,7 +33,7 @@ const ( selfAttentionLayer ) -func New(c ml.Config) (model.Model, error) { +func New(c fs.Config) (model.Model, error) { // Verify unified config if c.Uint("vision.block_count") == 0 { return nil, fmt.Errorf("non-unified vision model not supported") diff --git a/model/models/mllama/model_text.go b/model/models/mllama/model_text.go index 1cf30d89b..261897c33 100644 --- a/model/models/mllama/model_text.go +++ b/model/models/mllama/model_text.go @@ -4,6 +4,7 @@ import ( "math" "slices" + "github.com/ollama/ollama/fs" "github.com/ollama/ollama/kvcache" "github.com/ollama/ollama/ml" "github.com/ollama/ollama/ml/nn" @@ -220,7 +221,7 @@ func (m *TextModel) Forward(ctx ml.Context, inputIDs, positionIDs, outputs, mask return m.Output.Forward(ctx, hiddenState) } -func newTextModel(c ml.Config) *TextModel { +func newTextModel(c fs.Config) *TextModel { var decoderLayers []TextDecoderLayer for i := range c.Uint("block_count") { var textDecoderLayer TextDecoderLayer diff --git a/model/models/mllama/model_vision.go b/model/models/mllama/model_vision.go index ac777f051..2f7d26ca2 100644 --- a/model/models/mllama/model_vision.go +++ b/model/models/mllama/model_vision.go @@ -4,6 +4,7 @@ import ( "math" "slices" + "github.com/ollama/ollama/fs" "github.com/ollama/ollama/ml" "github.com/ollama/ollama/ml/nn" ) @@ -213,7 +214,7 @@ func (m *VisionModel) Forward(ctx ml.Context, pixelValues, positionIDs, aspectRa return hiddenState.Concat(ctx, hiddenStates, 0) } -func newVisionModel(c ml.Config) *VisionModel { +func newVisionModel(c fs.Config) *VisionModel { return &VisionModel{ Transformer: &VisionEncoder{Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count"))}, GlobalTransformer: &VisionEncoder{Layers: make([]VisionEncoderLayer, c.Uint("vision.global.block_count"))}, diff --git a/model/models/mllama/process_image.go b/model/models/mllama/process_image.go index c94d14a64..1b0506d32 100644 --- a/model/models/mllama/process_image.go +++ b/model/models/mllama/process_image.go @@ -8,14 +8,14 @@ import ( "golang.org/x/image/draw" - "github.com/ollama/ollama/ml" + "github.com/ollama/ollama/fs" ) type ImageProcessor struct { imageSize, numChannels, maxNumTiles int } -func newImageProcessor(c ml.Config) ImageProcessor { +func newImageProcessor(c fs.Config) ImageProcessor { return ImageProcessor{ imageSize: int(c.Uint("vision.image_size")), numChannels: int(c.Uint("vision.num_channels")),