mirror of
https://github.com/ollama/ollama.git
synced 2025-04-12 13:49:43 +02:00
fixes
This commit is contained in:
parent
dce7cf2a1a
commit
863ba57477
@ -169,7 +169,7 @@ var (
|
||||
// Enable the new Ollama engine
|
||||
NewEngine = Bool("OLLAMA_NEW_ENGINE")
|
||||
// ContextLength sets the default context length
|
||||
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 2048)
|
||||
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 8<<10)
|
||||
)
|
||||
|
||||
func String(s string) func() string {
|
||||
|
@ -165,6 +165,7 @@ type Tensor interface {
|
||||
Concat(ctx Context, t2 Tensor, dim int) Tensor
|
||||
Rows(ctx Context, t2 Tensor) Tensor
|
||||
Copy(ctx Context, t2 Tensor) Tensor
|
||||
Duplicate(ctx Context) Tensor
|
||||
}
|
||||
|
||||
// ScaledDotProductAttention implements a fused attention
|
||||
|
@ -986,10 +986,10 @@ func (t *Tensor) RoPEMulti(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, r
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tensor) IM2Col(ctx ml.Context, weight ml.Tensor, s0, s1, p0, p1, d0, d1 int) ml.Tensor {
|
||||
func (t *Tensor) IM2Col(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int) ml.Tensor {
|
||||
return &Tensor{
|
||||
b: t.b,
|
||||
t: C.ggml_im2col(ctx.(*Context).ctx, t.t, weight.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1), true, C.GGML_TYPE_F32),
|
||||
t: C.ggml_im2col(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1), true, C.GGML_TYPE_F32),
|
||||
}
|
||||
}
|
||||
|
||||
@ -1061,3 +1061,10 @@ func (t *Tensor) ScaledDotProductAttention(ctx ml.Context, key, value, mask ml.T
|
||||
return kqv.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tensor) Duplicate(ctx ml.Context) ml.Tensor {
|
||||
return &Tensor{
|
||||
b: t.b,
|
||||
t: C.ggml_dup(ctx.(*Context).ctx, t.t),
|
||||
}
|
||||
}
|
||||
|
@ -65,10 +65,9 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, er
|
||||
features, size := m.MultiModalProjector.Forward(ctx, visionOutputs, size)
|
||||
|
||||
// split into patches to be sent to the text transformer
|
||||
var rows []ml.Tensor
|
||||
for i := 0; i < size.Y; i++ {
|
||||
view := features.View(ctx, features.Dim(0)*i, features.Dim(0), features.Stride(1), size.X)
|
||||
rows = append(rows, view)
|
||||
rows := make([]ml.Tensor, size.Y)
|
||||
for i := range rows {
|
||||
rows[i] = features.View(ctx, features.Stride(1)*(i+size.X), features.Dim(0), features.Stride(1), size.X)
|
||||
}
|
||||
|
||||
return rows, nil
|
||||
@ -88,8 +87,8 @@ func (m *Model) PostTokenize(inputs []input.Input) ([]input.Input, error) {
|
||||
} else {
|
||||
inputMultimodal := inp.Multimodal.([]ml.Tensor)
|
||||
for i, row := range inputMultimodal {
|
||||
result = append(result, input.Input{Multimodal: row, MultimodalHash: inp.MultimodalHash, SameBatch: row.Dim(1)}) // Image data
|
||||
result = append(result, slices.Repeat([]input.Input{{Token: 10}}, row.Dim(1))...) // [IMG]
|
||||
result = append(result, input.Input{Token: 10, Multimodal: row, MultimodalHash: inp.MultimodalHash, SameBatch: row.Dim(1)}) // Image data
|
||||
result = append(result, slices.Repeat([]input.Input{{Token: 10}}, row.Dim(1)-1)...) // [IMG]
|
||||
if i == len(inputMultimodal)-1 {
|
||||
result = append(result, input.Input{Token: 13}) // [IMG_END]
|
||||
} else {
|
||||
|
@ -106,14 +106,12 @@ func (l *Layer) Forward(ctx ml.Context, hiddenState, positionIDs, outputs ml.Ten
|
||||
}
|
||||
|
||||
func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor, batch input.Batch, cache kvcache.Cache) ml.Tensor {
|
||||
hiddenState := m.TokenEmbedding.Forward(ctx, inputs)
|
||||
hiddenState := m.TokenEmbedding.Forward(ctx, inputs).Duplicate(ctx)
|
||||
|
||||
// image embeddings
|
||||
for _, image := range batch.Multimodal {
|
||||
visionOutputs := image.Multimodal.(ml.Tensor)
|
||||
// TODO (jmorganca): this fails on metal
|
||||
// TODO (jmorganca): should this be image.Index*hiddenState.Dim(0)
|
||||
// instead of image.Index*hiddenState.Stride(1)?
|
||||
ctx.Forward(visionOutputs.Copy(ctx, hiddenState.View(ctx, image.Index*hiddenState.Stride(1), visionOutputs.Dim(0)*visionOutputs.Dim(1))))
|
||||
}
|
||||
|
||||
|
@ -16,8 +16,8 @@ type PatchMerger struct {
|
||||
|
||||
func (pm *PatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, size image.Point, spatialMergeSize int) ml.Tensor {
|
||||
d := visionOutputs.Dim(0)
|
||||
imageGrid := visionOutputs.Reshape(ctx, size.Y, size.X, d)
|
||||
kernel := ctx.Input().Empty(ml.DTypeF32, spatialMergeSize, spatialMergeSize, d, 1)
|
||||
imageGrid := visionOutputs.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx).Reshape(ctx, size.X, size.Y, d)
|
||||
kernel := ctx.Input().Empty(ml.DTypeF32, spatialMergeSize, spatialMergeSize, d)
|
||||
patches := kernel.IM2Col(ctx, imageGrid, spatialMergeSize, spatialMergeSize, 0, 0, 1, 1)
|
||||
reshaped := patches.Reshape(ctx, d*spatialMergeSize*spatialMergeSize, patches.Dim(1)*patches.Dim(2))
|
||||
return pm.MergingLayer.Forward(ctx, reshaped)
|
||||
|
Loading…
x
Reference in New Issue
Block a user