From 863ba57477475e78df50d8942d08662483e33fc6 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 24 Mar 2025 12:16:58 -0700 Subject: [PATCH] fixes --- envconfig/config.go | 2 +- ml/backend.go | 1 + ml/backend/ggml/ggml.go | 11 +++++++++-- model/models/mistral3/model.go | 11 +++++------ model/models/mistral3/model_text.go | 4 +--- model/models/mistral3/model_vision.go | 4 ++-- 6 files changed, 19 insertions(+), 14 deletions(-) diff --git a/envconfig/config.go b/envconfig/config.go index fc702198f..a94dfc54c 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -169,7 +169,7 @@ var ( // Enable the new Ollama engine NewEngine = Bool("OLLAMA_NEW_ENGINE") // ContextLength sets the default context length - ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 2048) + ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 8<<10) ) func String(s string) func() string { diff --git a/ml/backend.go b/ml/backend.go index 31670ee07..14cc0cc3c 100644 --- a/ml/backend.go +++ b/ml/backend.go @@ -165,6 +165,7 @@ type Tensor interface { Concat(ctx Context, t2 Tensor, dim int) Tensor Rows(ctx Context, t2 Tensor) Tensor Copy(ctx Context, t2 Tensor) Tensor + Duplicate(ctx Context) Tensor } // ScaledDotProductAttention implements a fused attention diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index 3558c7ad3..9fcede6a9 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -986,10 +986,10 @@ func (t *Tensor) RoPEMulti(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, r } } -func (t *Tensor) IM2Col(ctx ml.Context, weight ml.Tensor, s0, s1, p0, p1, d0, d1 int) ml.Tensor { +func (t *Tensor) IM2Col(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int) ml.Tensor { return &Tensor{ b: t.b, - t: C.ggml_im2col(ctx.(*Context).ctx, t.t, weight.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1), true, C.GGML_TYPE_F32), + t: C.ggml_im2col(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1), true, C.GGML_TYPE_F32), } } @@ -1061,3 +1061,10 @@ func (t *Tensor) ScaledDotProductAttention(ctx ml.Context, key, value, mask ml.T return kqv.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx) } } + +func (t *Tensor) Duplicate(ctx ml.Context) ml.Tensor { + return &Tensor{ + b: t.b, + t: C.ggml_dup(ctx.(*Context).ctx, t.t), + } +} diff --git a/model/models/mistral3/model.go b/model/models/mistral3/model.go index c66d162cc..c2db3335f 100644 --- a/model/models/mistral3/model.go +++ b/model/models/mistral3/model.go @@ -65,10 +65,9 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, er features, size := m.MultiModalProjector.Forward(ctx, visionOutputs, size) // split into patches to be sent to the text transformer - var rows []ml.Tensor - for i := 0; i < size.Y; i++ { - view := features.View(ctx, features.Dim(0)*i, features.Dim(0), features.Stride(1), size.X) - rows = append(rows, view) + rows := make([]ml.Tensor, size.Y) + for i := range rows { + rows[i] = features.View(ctx, features.Stride(1)*(i+size.X), features.Dim(0), features.Stride(1), size.X) } return rows, nil @@ -88,8 +87,8 @@ func (m *Model) PostTokenize(inputs []input.Input) ([]input.Input, error) { } else { inputMultimodal := inp.Multimodal.([]ml.Tensor) for i, row := range inputMultimodal { - result = append(result, input.Input{Multimodal: row, MultimodalHash: inp.MultimodalHash, SameBatch: row.Dim(1)}) // Image data - result = append(result, slices.Repeat([]input.Input{{Token: 10}}, row.Dim(1))...) // [IMG] + result = append(result, input.Input{Token: 10, Multimodal: row, MultimodalHash: inp.MultimodalHash, SameBatch: row.Dim(1)}) // Image data + result = append(result, slices.Repeat([]input.Input{{Token: 10}}, row.Dim(1)-1)...) // [IMG] if i == len(inputMultimodal)-1 { result = append(result, input.Input{Token: 13}) // [IMG_END] } else { diff --git a/model/models/mistral3/model_text.go b/model/models/mistral3/model_text.go index b70eafe19..f280b1340 100644 --- a/model/models/mistral3/model_text.go +++ b/model/models/mistral3/model_text.go @@ -106,14 +106,12 @@ func (l *Layer) Forward(ctx ml.Context, hiddenState, positionIDs, outputs ml.Ten } func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor, batch input.Batch, cache kvcache.Cache) ml.Tensor { - hiddenState := m.TokenEmbedding.Forward(ctx, inputs) + hiddenState := m.TokenEmbedding.Forward(ctx, inputs).Duplicate(ctx) // image embeddings for _, image := range batch.Multimodal { visionOutputs := image.Multimodal.(ml.Tensor) // TODO (jmorganca): this fails on metal - // TODO (jmorganca): should this be image.Index*hiddenState.Dim(0) - // instead of image.Index*hiddenState.Stride(1)? ctx.Forward(visionOutputs.Copy(ctx, hiddenState.View(ctx, image.Index*hiddenState.Stride(1), visionOutputs.Dim(0)*visionOutputs.Dim(1)))) } diff --git a/model/models/mistral3/model_vision.go b/model/models/mistral3/model_vision.go index ee98a2caf..ac1fbe18a 100644 --- a/model/models/mistral3/model_vision.go +++ b/model/models/mistral3/model_vision.go @@ -16,8 +16,8 @@ type PatchMerger struct { func (pm *PatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, size image.Point, spatialMergeSize int) ml.Tensor { d := visionOutputs.Dim(0) - imageGrid := visionOutputs.Reshape(ctx, size.Y, size.X, d) - kernel := ctx.Input().Empty(ml.DTypeF32, spatialMergeSize, spatialMergeSize, d, 1) + imageGrid := visionOutputs.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx).Reshape(ctx, size.X, size.Y, d) + kernel := ctx.Input().Empty(ml.DTypeF32, spatialMergeSize, spatialMergeSize, d) patches := kernel.IM2Col(ctx, imageGrid, spatialMergeSize, spatialMergeSize, 0, 0, 1, 1) reshaped := patches.Reshape(ctx, d*spatialMergeSize*spatialMergeSize, patches.Dim(1)*patches.Dim(2)) return pm.MergingLayer.Forward(ctx, reshaped)