mirror of
https://github.com/ollama/ollama.git
synced 2025-09-06 05:01:52 +02:00
fix pixel values padding (#10718)
* panic if trying to pad 4d * fix pixel values padding
This commit is contained in:
@@ -915,6 +915,8 @@ func (t *Tensor) RMSNorm(ctx ml.Context, w ml.Tensor, eps float32) ml.Tensor {
|
|||||||
func (t *Tensor) Pad(ctx ml.Context, shape ...int) ml.Tensor {
|
func (t *Tensor) Pad(ctx ml.Context, shape ...int) ml.Tensor {
|
||||||
if len(shape) != 4 {
|
if len(shape) != 4 {
|
||||||
panic("expected 4 dimensions")
|
panic("expected 4 dimensions")
|
||||||
|
} else if shape[3] != 0 {
|
||||||
|
panic("cuda does not support 4d tensors")
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Tensor{
|
return &Tensor{
|
||||||
|
@@ -3,6 +3,7 @@ package mllama
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"image"
|
"image"
|
||||||
|
"slices"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs"
|
"github.com/ollama/ollama/fs"
|
||||||
"github.com/ollama/ollama/kvcache"
|
"github.com/ollama/ollama/kvcache"
|
||||||
@@ -73,13 +74,17 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, er
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
pixelValues, err := ctx.Input().FromFloatSlice(f32s, m.imageSize, m.imageSize, m.numChannels, ratio.numTiles())
|
if ratio.numTiles() < m.maxNumTiles {
|
||||||
|
// Pad tiles to maxNumTiles
|
||||||
|
f32s = slices.Grow(f32s, m.imageSize*m.imageSize*m.numChannels*m.maxNumTiles)
|
||||||
|
f32s = f32s[:m.imageSize*m.imageSize*m.numChannels*m.maxNumTiles]
|
||||||
|
}
|
||||||
|
|
||||||
|
pixelValues, err := ctx.Input().FromFloatSlice(f32s, m.imageSize, m.imageSize, m.numChannels, m.maxNumTiles)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
pixelValues = pixelValues.Pad(ctx, 0, 0, 0, m.ImageProcessor.maxNumTiles-ratio.numTiles())
|
|
||||||
|
|
||||||
aspectRatio, err := ctx.Input().FromIntSlice([]int32{int32(ratio.rank)}, 1)
|
aspectRatio, err := ctx.Input().FromIntSlice([]int32{int32(ratio.rank)}, 1)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
Reference in New Issue
Block a user