diff --git a/model/model.go b/model/model.go index 89b6c803b..7b2bdf89c 100644 --- a/model/model.go +++ b/model/model.go @@ -22,6 +22,8 @@ import ( "github.com/ollama/ollama/model/input" ) +var ErrNoVisionModel = errors.New("vision model not found") + // Model implements a specific model architecture, defining the forward pass and any model-specific configuration type Model interface { Forward(ml.Context, input.Options) (ml.Tensor, error) diff --git a/model/models/gemma3/model.go b/model/models/gemma3/model.go index b5311f187..24193f15f 100644 --- a/model/models/gemma3/model.go +++ b/model/models/gemma3/model.go @@ -84,6 +84,10 @@ func New(c ml.Config) (model.Model, error) { } func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, error) { + if len(m.VisionModel.Layers) == 0 { + return nil, model.ErrNoVisionModel + } + image, _, err := image.Decode(bytes.NewReader(multimodalData)) if err != nil { return nil, err diff --git a/model/models/mllama/model.go b/model/models/mllama/model.go index 31ba15dfd..071d77ac7 100644 --- a/model/models/mllama/model.go +++ b/model/models/mllama/model.go @@ -63,6 +63,10 @@ func New(c ml.Config) (model.Model, error) { } func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, error) { + if len(m.VisionModel.Transformer.Layers) == 0 || len(m.GlobalTransformer.Layers) == 0 { + return nil, model.ErrNoVisionModel + } + image, _, err := image.Decode(bytes.NewReader(multimodalData)) if err != nil { return nil, err