mirror of
https://github.com/ollama/ollama.git
synced 2025-04-13 14:19:23 +02:00
model: support for mistral-small in the ollama runner
Mistral is a popular research lab making open source models. This updates the forward pass of llama architecture models to support both llama models and mistral models by accounting for additional metadata present in mistral models, and finding the correct dimensions for the output projection.
This commit is contained in:
parent
1861fbdeb5
commit
3a9c7145e4
@ -182,8 +182,10 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
|
||||
|
||||
var conv ModelConverter
|
||||
switch p.Architectures[0] {
|
||||
case "LlamaForCausalLM", "MistralForCausalLM":
|
||||
case "LlamaForCausalLM":
|
||||
conv = &llamaModel{}
|
||||
case "Mistral3ForConditionalGeneration":
|
||||
conv = &mistral3Model{}
|
||||
case "MixtralForCausalLM":
|
||||
conv = &mixtralModel{}
|
||||
case "GemmaForCausalLM":
|
||||
|
190
convert/convert_mistral.go
Normal file
190
convert/convert_mistral.go
Normal file
@ -0,0 +1,190 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type mistral3Model struct {
|
||||
ModelParameters
|
||||
ImageTokenIndex uint32 `json:"image_token_index"`
|
||||
SpatialMergeSize uint32 `json:"spatial_merge_size"`
|
||||
VisionFeatureLayer int32 `json:"vision_feature_layer"`
|
||||
TextModel struct {
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
SlidingWindow *uint32 `json:"sliding_window"`
|
||||
HiddenAct string `json:"hidden_act"`
|
||||
VocabSize uint32 `json:"vocab_size"`
|
||||
} `json:"text_config"`
|
||||
VisionModel struct {
|
||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||
HiddenSize uint32 `json:"hidden_size"`
|
||||
IntermediateSize uint32 `json:"intermediate_size"`
|
||||
ImageSize uint32 `json:"image_size"`
|
||||
NumChannels uint32 `json:"num_channels"`
|
||||
PatchSize uint32 `json:"patch_size"`
|
||||
HeadDim uint32 `json:"head_dim"`
|
||||
HiddenAct string `json:"hidden_act"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
} `json:"vision_config"`
|
||||
MultiModalProjectorBias bool `json:"multimodal_projector_bias"`
|
||||
ProjectorHiddenAct string `json:"projector_hidden_act"`
|
||||
}
|
||||
|
||||
func (p *mistral3Model) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "mistral3"
|
||||
kv["mistral3.vocab_size"] = p.TextModel.VocabSize
|
||||
|
||||
// Text configuration
|
||||
kv["mistral3.block_count"] = p.TextModel.NumHiddenLayers
|
||||
kv["mistral3.context_length"] = p.TextModel.MaxPositionEmbeddings
|
||||
kv["mistral3.embedding_length"] = p.TextModel.HiddenSize
|
||||
kv["mistral3.feed_forward_length"] = p.TextModel.IntermediateSize
|
||||
kv["mistral3.attention.head_count"] = p.TextModel.NumAttentionHeads
|
||||
kv["mistral3.attention.head_count_kv"] = p.TextModel.NumKeyValueHeads
|
||||
kv["mistral3.attention.layer_norm_rms_epsilon"] = p.TextModel.RMSNormEPS
|
||||
kv["mistral3.attention.key_length"] = p.TextModel.HeadDim
|
||||
kv["mistral3.attention.value_length"] = p.TextModel.HeadDim
|
||||
kv["mistral3.rope.dimension_count"] = p.TextModel.HiddenSize / p.TextModel.NumHiddenLayers
|
||||
kv["mistral3.rope.freq_base"] = p.TextModel.RopeTheta
|
||||
|
||||
// Vision configuration
|
||||
kv["mistral3.vision.block_count"] = p.VisionModel.NumHiddenLayers
|
||||
kv["mistral3.vision.embedding_length"] = p.VisionModel.HiddenSize
|
||||
kv["mistral3.vision.feed_forward_length"] = p.VisionModel.IntermediateSize
|
||||
kv["mistral3.vision.attention.head_count"] = p.VisionModel.NumAttentionHeads
|
||||
kv["mistral3.vision.attention.key_length"] = p.VisionModel.HeadDim
|
||||
kv["mistral3.vision.image_size"] = p.VisionModel.ImageSize
|
||||
kv["mistral3.vision.patch_size"] = p.VisionModel.PatchSize
|
||||
kv["mistral3.vision.num_channels"] = p.VisionModel.NumChannels
|
||||
// kv["mistral3.vision.attention.layer_norm_epsilon"] = 1e-05 // Default value
|
||||
kv["mistral3.vision.rope.freq_base"] = p.VisionModel.RopeTheta
|
||||
|
||||
// Multimodal configuration
|
||||
kv["mistral3.image_token_index"] = p.ImageTokenIndex
|
||||
kv["mistral3.spatial_merge_size"] = p.SpatialMergeSize
|
||||
|
||||
kv["mistral3.mm.projector_bias"] = p.MultiModalProjectorBias
|
||||
|
||||
if p.ProjectorHiddenAct != "" {
|
||||
kv["mistral3.mm.projector_hidden_act"] = p.ProjectorHiddenAct
|
||||
}
|
||||
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *mistral3Model) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
|
||||
for _, t := range ts {
|
||||
if !strings.HasPrefix(t.Name(), "v.") {
|
||||
if strings.HasSuffix(t.Name(), ".attn_q.weight") ||
|
||||
strings.HasSuffix(t.Name(), ".attn_k.weight") {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
}
|
||||
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
WriterTo: t,
|
||||
})
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *mistral3Model) Replacements() []string {
|
||||
return []string{
|
||||
"language_model.model.norm", "output_norm",
|
||||
"language_model.model.", "",
|
||||
"language_model.", "",
|
||||
"layers", "blk",
|
||||
"transformer.layers", "blk",
|
||||
"vision_tower", "v",
|
||||
"ln_pre", "encoder_norm",
|
||||
"input_layernorm", "attn_norm",
|
||||
"post_attention_layernorm", "ffn_norm",
|
||||
"embed_tokens", "token_embd",
|
||||
"self_attn.q_proj", "attn_q",
|
||||
"self_attn.k_proj", "attn_k",
|
||||
"self_attn.v_proj", "attn_v",
|
||||
"self_attn.o_proj", "attn_output",
|
||||
"mlp.down_proj", "ffn_down",
|
||||
"mlp.gate_proj", "ffn_gate",
|
||||
"mlp.up_proj", "ffn_up",
|
||||
"attention.q_proj", "attn_q",
|
||||
"attention.k_proj", "attn_k",
|
||||
"attention.v_proj", "attn_v",
|
||||
"attention.o_proj", "attn_output",
|
||||
"attention_norm", "attn_norm",
|
||||
"feed_forward.gate_proj", "ffn_gate",
|
||||
"feed_forward.down_proj", "ffn_down",
|
||||
"feed_forward.up_proj", "ffn_up",
|
||||
"multi_modal_projector", "mm",
|
||||
"ffn_norm", "ffn_norm",
|
||||
"lm_head", "output",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *mistral3Model) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||
var dims []int
|
||||
for _, dim := range shape {
|
||||
dims = append(dims, int(dim))
|
||||
}
|
||||
|
||||
var heads uint32
|
||||
if strings.HasSuffix(name, ".attn_q.weight") {
|
||||
heads = p.TextModel.NumAttentionHeads
|
||||
} else if strings.HasSuffix(name, ".attn_k.weight") {
|
||||
heads = cmp.Or(p.TextModel.NumKeyValueHeads, p.TextModel.NumAttentionHeads)
|
||||
} else {
|
||||
return nil, fmt.Errorf("unknown tensor for repack: %s", name)
|
||||
}
|
||||
|
||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.T(0, 2, 1, 3); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(dims...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts, err := native.SelectF32(n, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f32s []float32
|
||||
for _, t := range ts {
|
||||
f32s = append(f32s, t...)
|
||||
}
|
||||
|
||||
return f32s, nil
|
||||
}
|
@ -62,10 +62,7 @@ func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
|
||||
Pattern string
|
||||
Func func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error)
|
||||
}{
|
||||
{"model-*-of-*.safetensors", parseSafetensors},
|
||||
{"model.safetensors", parseSafetensors},
|
||||
{"adapters.safetensors", parseSafetensors},
|
||||
{"adapter_model.safetensors", parseSafetensors},
|
||||
{"*.safetensors", parseSafetensors},
|
||||
{"pytorch_model-*-of-*.bin", parseTorch},
|
||||
{"pytorch_model.bin", parseTorch},
|
||||
{"consolidated.*.pth", parseTorch},
|
||||
|
@ -134,7 +134,10 @@ func (kv KV) Floats(key string, defaultValue ...[]float32) []float32 {
|
||||
}
|
||||
|
||||
func (kv KV) OllamaEngineRequired() bool {
|
||||
return kv.Architecture() == "gemma3"
|
||||
return slices.Contains([]string{
|
||||
"gemma3",
|
||||
"mistral3",
|
||||
}, kv.Architecture())
|
||||
}
|
||||
|
||||
func keyValue[T string | uint32 | uint64 | float32 | *array | bool](kv KV, key string, defaultValue ...T) T {
|
||||
@ -638,7 +641,7 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
|
||||
embeddingLength*numPatches*maxNumTiles +
|
||||
9*embeddingLength*numPaddedPatches*maxNumTiles +
|
||||
numPaddedPatches*maxNumTiles*numPaddedPatches*maxNumTiles*headCount)
|
||||
case "gemma3":
|
||||
case "gemma3", "mistral3":
|
||||
graphSize = 4 * (imageSize*imageSize*numChannels +
|
||||
embeddingLength*patchSize +
|
||||
numPatches*numPatches*headCount)
|
||||
|
@ -484,6 +484,14 @@ func (t *testTensor) Floats() []float32 {
|
||||
return out
|
||||
}
|
||||
|
||||
func (t *testTensor) Neg(ctx ml.Context) ml.Tensor {
|
||||
out := ctx.Empty(t.DType(), t.Shape()...).(*testTensor)
|
||||
for i := range out.data {
|
||||
out.data[i] = -t.data[i]
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (t *testTensor) Add(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
|
||||
out := ctx.Empty(t.DType(), t.Shape()...).(*testTensor)
|
||||
|
||||
@ -538,17 +546,15 @@ func (t *testTensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, di
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (t *testTensor) Tanh(ctx ml.Context) ml.Tensor {
|
||||
func (t *testTensor) IM2Col(ctx ml.Context, weight ml.Tensor, s0, s1, p0, p1, d0, d1 int) ml.Tensor {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (t *testTensor) GELU(ctx ml.Context) ml.Tensor {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (t *testTensor) SILU(ctx ml.Context) ml.Tensor {
|
||||
panic("not implemented")
|
||||
}
|
||||
func (t *testTensor) Cos(ctx ml.Context) ml.Tensor { panic("not implemented") }
|
||||
func (t *testTensor) Sin(ctx ml.Context) ml.Tensor { panic("not implemented") }
|
||||
func (t *testTensor) Tanh(ctx ml.Context) ml.Tensor { panic("not implemented") }
|
||||
func (t *testTensor) GELU(ctx ml.Context) ml.Tensor { panic("not implemented") }
|
||||
func (t *testTensor) SILU(ctx ml.Context) ml.Tensor { panic("not implemented") }
|
||||
|
||||
func (t *testTensor) Reshape(ctx ml.Context, shape ...int) ml.Tensor {
|
||||
panic("not implemented")
|
||||
@ -600,6 +606,8 @@ func (t *testTensor) Stack(ctx ml.Context, dim int, s ...ml.Tensor) ml.Tensor {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (t *testTensor) Repeat(ctx ml.Context, dim, n int) ml.Tensor { panic("not implemented") }
|
||||
|
||||
func (t *testTensor) Concat(ctx ml.Context, t2 ml.Tensor, dim int) ml.Tensor {
|
||||
panic("not implemented")
|
||||
}
|
||||
@ -612,3 +620,5 @@ func (t *testTensor) Copy(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
|
||||
copy(t2.(*testTensor).data, t.data)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *testTensor) Duplicate(ctx ml.Context) ml.Tensor { panic("not implemented") }
|
||||
|
17
llama/llama.cpp/src/llama-arch.cpp
vendored
17
llama/llama.cpp/src/llama-arch.cpp
vendored
@ -65,6 +65,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
||||
{ LLM_ARCH_CHAMELEON, "chameleon" },
|
||||
{ LLM_ARCH_SOLAR, "solar" },
|
||||
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
|
||||
{ LLM_ARCH_MISTRAL3, "mistral3" },
|
||||
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
||||
};
|
||||
|
||||
@ -1371,6 +1372,22 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
||||
{ LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
|
||||
},
|
||||
},
|
||||
{
|
||||
LLM_ARCH_MISTRAL3,
|
||||
{
|
||||
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
||||
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
||||
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
||||
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
||||
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
||||
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
||||
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
||||
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
||||
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
||||
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
||||
}
|
||||
},
|
||||
{
|
||||
LLM_ARCH_UNKNOWN,
|
||||
{
|
||||
|
1
llama/llama.cpp/src/llama-arch.h
vendored
1
llama/llama.cpp/src/llama-arch.h
vendored
@ -69,6 +69,7 @@ enum llm_arch {
|
||||
LLM_ARCH_CHAMELEON,
|
||||
LLM_ARCH_SOLAR,
|
||||
LLM_ARCH_WAVTOKENIZER_DEC,
|
||||
LLM_ARCH_MISTRAL3,
|
||||
LLM_ARCH_UNKNOWN,
|
||||
};
|
||||
|
||||
|
3
llama/llama.cpp/src/llama-model.cpp
vendored
3
llama/llama.cpp/src/llama-model.cpp
vendored
@ -1277,6 +1277,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||
ml.get_key(LLM_KV_ATTENTION_GROUPNORM_GROUPS, hparams.n_norm_groups);
|
||||
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
||||
} break;
|
||||
case LLM_ARCH_MISTRAL3: break;
|
||||
default: throw std::runtime_error("unsupported model architecture");
|
||||
}
|
||||
|
||||
@ -3537,6 +3538,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {hparams.convnext.n_embd, n_embd}, 0);
|
||||
output_b = create_tensor(tn(LLM_TENSOR_OUTPUT, "bias"), {n_embd}, 0);
|
||||
} break;
|
||||
case LLM_ARCH_MISTRAL3: break;
|
||||
default:
|
||||
throw std::runtime_error("unknown architecture");
|
||||
}
|
||||
@ -4015,6 +4017,7 @@ enum llama_rope_type llama_model_rope_type(const struct llama_model * model) {
|
||||
case LLM_ARCH_GRANITE_MOE:
|
||||
case LLM_ARCH_CHAMELEON:
|
||||
case LLM_ARCH_SOLAR:
|
||||
case LLM_ARCH_MISTRAL3:
|
||||
return LLAMA_ROPE_TYPE_NORM;
|
||||
|
||||
// the pairs of head values are offset by n_rot/2
|
||||
|
9
llama/llama.cpp/src/llama-quant.cpp
vendored
9
llama/llama.cpp/src/llama-quant.cpp
vendored
@ -738,13 +738,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
||||
bool quantize = name.rfind("weight") == name.size() - 6; // ends with 'weight'?
|
||||
|
||||
// don't quantize vision stuff
|
||||
quantize &= name.find("v.blk.") == std::string::npos;
|
||||
|
||||
quantize &= name.find("mm.mm_input_projection.weight") == std::string::npos;
|
||||
quantize &= name.find("mm.mm_soft_emb_norm.weight") == std::string::npos;
|
||||
quantize &= name.find("v.patch_embedding.weight") == std::string::npos;
|
||||
quantize &= name.find("v.position_embedding.weight") == std::string::npos;
|
||||
quantize &= name.find("v.post_layernorm.weight") == std::string::npos;
|
||||
quantize &= name.find("v.") == std::string::npos;
|
||||
quantize &= name.find("mm.") == std::string::npos;
|
||||
|
||||
// quantize only 2D and 3D tensors (experts)
|
||||
quantize &= (ggml_n_dims(tensor) >= 2);
|
||||
|
@ -1,17 +1,19 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Patrick Devine <patrick@infrahq.com>
|
||||
Date: Fri, 14 Mar 2025 16:33:23 -0700
|
||||
Subject: [PATCH] gemma3 quantization
|
||||
Subject: [PATCH] add model quantizations
|
||||
|
||||
- gemma3
|
||||
- mistral3
|
||||
---
|
||||
src/llama-arch.cpp | 19 +++++++++++++++++++
|
||||
src/llama-arch.h | 1 +
|
||||
src/llama-model.cpp | 7 +++++++
|
||||
src/llama-quant.cpp | 9 +++++++++
|
||||
4 files changed, 36 insertions(+)
|
||||
src/llama-arch.cpp | 36 ++++++++++++++++++++++++++++++++++++
|
||||
src/llama-arch.h | 2 ++
|
||||
src/llama-model.cpp | 10 ++++++++++
|
||||
src/llama-quant.cpp | 4 ++++
|
||||
4 files changed, 52 insertions(+)
|
||||
|
||||
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
|
||||
index b6f20286..b443fcd3 100644
|
||||
index b6f20286..13a0a988 100644
|
||||
--- a/src/llama-arch.cpp
|
||||
+++ b/src/llama-arch.cpp
|
||||
@@ -37,6 +37,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
||||
@ -22,7 +24,15 @@ index b6f20286..b443fcd3 100644
|
||||
{ LLM_ARCH_STARCODER2, "starcoder2" },
|
||||
{ LLM_ARCH_MAMBA, "mamba" },
|
||||
{ LLM_ARCH_XVERSE, "xverse" },
|
||||
@@ -804,6 +805,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
||||
@@ -64,6 +65,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
||||
{ LLM_ARCH_CHAMELEON, "chameleon" },
|
||||
{ LLM_ARCH_SOLAR, "solar" },
|
||||
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
|
||||
+ { LLM_ARCH_MISTRAL3, "mistral3" },
|
||||
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
||||
};
|
||||
|
||||
@@ -804,6 +806,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
||||
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
||||
},
|
||||
},
|
||||
@ -47,8 +57,31 @@ index b6f20286..b443fcd3 100644
|
||||
{
|
||||
LLM_ARCH_STARCODER2,
|
||||
{
|
||||
@@ -1352,6 +1372,22 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
||||
{ LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
|
||||
},
|
||||
},
|
||||
+ {
|
||||
+ LLM_ARCH_MISTRAL3,
|
||||
+ {
|
||||
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
||||
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
||||
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
||||
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
||||
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
||||
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
||||
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
||||
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
||||
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
||||
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
||||
+ }
|
||||
+ },
|
||||
{
|
||||
LLM_ARCH_UNKNOWN,
|
||||
{
|
||||
diff --git a/src/llama-arch.h b/src/llama-arch.h
|
||||
index ec742224..aad92a5d 100644
|
||||
index ec742224..8476ae0a 100644
|
||||
--- a/src/llama-arch.h
|
||||
+++ b/src/llama-arch.h
|
||||
@@ -41,6 +41,7 @@ enum llm_arch {
|
||||
@ -59,8 +92,16 @@ index ec742224..aad92a5d 100644
|
||||
LLM_ARCH_STARCODER2,
|
||||
LLM_ARCH_MAMBA,
|
||||
LLM_ARCH_XVERSE,
|
||||
@@ -68,6 +69,7 @@ enum llm_arch {
|
||||
LLM_ARCH_CHAMELEON,
|
||||
LLM_ARCH_SOLAR,
|
||||
LLM_ARCH_WAVTOKENIZER_DEC,
|
||||
+ LLM_ARCH_MISTRAL3,
|
||||
LLM_ARCH_UNKNOWN,
|
||||
};
|
||||
|
||||
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
|
||||
index ab1a07d1..70183041 100644
|
||||
index ab1a07d1..db4f2685 100644
|
||||
--- a/src/llama-model.cpp
|
||||
+++ b/src/llama-model.cpp
|
||||
@@ -878,6 +878,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||
@ -73,7 +114,15 @@ index ab1a07d1..70183041 100644
|
||||
case LLM_ARCH_STARCODER2:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
@@ -2537,6 +2540,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||
@@ -1274,6 +1277,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||
ml.get_key(LLM_KV_ATTENTION_GROUPNORM_GROUPS, hparams.n_norm_groups);
|
||||
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
||||
} break;
|
||||
+ case LLM_ARCH_MISTRAL3: break;
|
||||
default: throw std::runtime_error("unsupported model architecture");
|
||||
}
|
||||
|
||||
@@ -2537,6 +2541,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||
layer.ffn_post_norm = create_tensor(tn(LLM_TENSOR_FFN_POST_NORM, "weight", i), {n_embd}, 0);
|
||||
}
|
||||
} break;
|
||||
@ -83,7 +132,23 @@ index ab1a07d1..70183041 100644
|
||||
case LLM_ARCH_STARCODER2:
|
||||
{
|
||||
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
||||
@@ -4029,6 +4035,7 @@ enum llama_rope_type llama_model_rope_type(const struct llama_model * model) {
|
||||
@@ -3531,6 +3538,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {hparams.convnext.n_embd, n_embd}, 0);
|
||||
output_b = create_tensor(tn(LLM_TENSOR_OUTPUT, "bias"), {n_embd}, 0);
|
||||
} break;
|
||||
+ case LLM_ARCH_MISTRAL3: break;
|
||||
default:
|
||||
throw std::runtime_error("unknown architecture");
|
||||
}
|
||||
@@ -4009,6 +4017,7 @@ enum llama_rope_type llama_model_rope_type(const struct llama_model * model) {
|
||||
case LLM_ARCH_GRANITE_MOE:
|
||||
case LLM_ARCH_CHAMELEON:
|
||||
case LLM_ARCH_SOLAR:
|
||||
+ case LLM_ARCH_MISTRAL3:
|
||||
return LLAMA_ROPE_TYPE_NORM;
|
||||
|
||||
// the pairs of head values are offset by n_rot/2
|
||||
@@ -4029,6 +4038,7 @@ enum llama_rope_type llama_model_rope_type(const struct llama_model * model) {
|
||||
case LLM_ARCH_PHIMOE:
|
||||
case LLM_ARCH_GEMMA:
|
||||
case LLM_ARCH_GEMMA2:
|
||||
@ -92,21 +157,16 @@ index ab1a07d1..70183041 100644
|
||||
case LLM_ARCH_OPENELM:
|
||||
case LLM_ARCH_GPTNEOX:
|
||||
diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
|
||||
index 6eb1da08..d2f3a510 100644
|
||||
index 6eb1da08..ebcbafa1 100644
|
||||
--- a/src/llama-quant.cpp
|
||||
+++ b/src/llama-quant.cpp
|
||||
@@ -737,6 +737,15 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
||||
@@ -737,6 +737,10 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
||||
// This used to be a regex, but <regex> has an extreme cost to compile times.
|
||||
bool quantize = name.rfind("weight") == name.size() - 6; // ends with 'weight'?
|
||||
|
||||
+ // don't quantize vision stuff
|
||||
+ quantize &= name.find("v.blk.") == std::string::npos;
|
||||
+
|
||||
+ quantize &= name.find("mm.mm_input_projection.weight") == std::string::npos;
|
||||
+ quantize &= name.find("mm.mm_soft_emb_norm.weight") == std::string::npos;
|
||||
+ quantize &= name.find("v.patch_embedding.weight") == std::string::npos;
|
||||
+ quantize &= name.find("v.position_embedding.weight") == std::string::npos;
|
||||
+ quantize &= name.find("v.post_layernorm.weight") == std::string::npos;
|
||||
+ quantize &= name.find("v.") == std::string::npos;
|
||||
+ quantize &= name.find("mm.") == std::string::npos;
|
||||
+
|
||||
// quantize only 2D and 3D tensors (experts)
|
||||
quantize &= (ggml_n_dims(tensor) >= 2);
|
75
llama/patches/0022-metal-add-op_neg.patch
Normal file
75
llama/patches/0022-metal-add-op_neg.patch
Normal file
@ -0,0 +1,75 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Yang <git@mxy.ng>
|
||||
Date: Wed, 2 Apr 2025 15:26:15 -0700
|
||||
Subject: [PATCH] metal: add op_neg
|
||||
|
||||
---
|
||||
ggml/src/ggml-metal/ggml-metal.m | 15 +++++++++++++++
|
||||
ggml/src/ggml-metal/ggml-metal.metal | 7 +++++++
|
||||
2 files changed, 22 insertions(+)
|
||||
|
||||
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
|
||||
index e4c093f9..d8422f1b 100644
|
||||
--- a/ggml/src/ggml-metal/ggml-metal.m
|
||||
+++ b/ggml/src/ggml-metal/ggml-metal.m
|
||||
@@ -423,6 +423,7 @@ enum ggml_metal_kernel_type {
|
||||
GGML_METAL_KERNEL_TYPE_SQRT,
|
||||
GGML_METAL_KERNEL_TYPE_SIN,
|
||||
GGML_METAL_KERNEL_TYPE_COS,
|
||||
+ GGML_METAL_KERNEL_TYPE_NEG,
|
||||
GGML_METAL_KERNEL_TYPE_SUM_ROWS,
|
||||
GGML_METAL_KERNEL_TYPE_POOL_2D_AVG_F32,
|
||||
GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32,
|
||||
@@ -1039,6 +1040,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SQRT, sqrt, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SIN, sin, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_COS, cos, true);
|
||||
+ GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_NEG, neg, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARGMAX, argmax, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_AVG_F32, pool_2d_avg_f32, true);
|
||||
@@ -1202,6 +1204,7 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
|
||||
case GGML_UNARY_OP_GELU_QUICK:
|
||||
case GGML_UNARY_OP_SILU:
|
||||
case GGML_UNARY_OP_ELU:
|
||||
+ case GGML_UNARY_OP_NEG:
|
||||
return ggml_is_contiguous(op->src[0]);
|
||||
default:
|
||||
return false;
|
||||
@@ -1873,6 +1876,18 @@ static void ggml_metal_encode_node(
|
||||
|
||||
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
||||
} break;
|
||||
+ case GGML_UNARY_OP_NEG:
|
||||
+ {
|
||||
+ id<MTLComputePipelineState> pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_NEG].pipeline;
|
||||
+
|
||||
+ [encoder setComputePipelineState:pipeline];
|
||||
+ [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||
+ [encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
||||
+
|
||||
+ const int64_t n = ggml_nelements(dst);
|
||||
+
|
||||
+ [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
||||
+ } break;
|
||||
default:
|
||||
{
|
||||
GGML_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(dst->op));
|
||||
diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal
|
||||
index f38909d0..bb0ff668 100644
|
||||
--- a/ggml/src/ggml-metal/ggml-metal.metal
|
||||
+++ b/ggml/src/ggml-metal/ggml-metal.metal
|
||||
@@ -945,6 +945,13 @@ kernel void kernel_cos(
|
||||
dst[tpig] = cos(src0[tpig]);
|
||||
}
|
||||
|
||||
+kernel void kernel_neg(
|
||||
+ device const float * src0,
|
||||
+ device float * dst,
|
||||
+ uint tpig[[thread_position_in_grid]]) {
|
||||
+ dst[tpig] = -src0[tpig];
|
||||
+}
|
||||
+
|
||||
kernel void kernel_sum_rows(
|
||||
device const float * src0,
|
||||
device float * dst,
|
@ -118,6 +118,7 @@ type Tensor interface {
|
||||
Bytes() []byte
|
||||
Floats() []float32
|
||||
|
||||
Neg(ctx Context) Tensor
|
||||
Add(ctx Context, t2 Tensor) Tensor
|
||||
Mul(ctx Context, t2 Tensor) Tensor
|
||||
Mulmat(ctx Context, t2 Tensor) Tensor
|
||||
@ -132,7 +133,10 @@ type Tensor interface {
|
||||
Conv2D(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
|
||||
|
||||
RoPE(ctx Context, positionIDs, ropeFactors Tensor, dim, ropeType uint32, base, scale float32) Tensor
|
||||
IM2Col(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
|
||||
|
||||
Sin(ctx Context) Tensor
|
||||
Cos(ctx Context) Tensor
|
||||
Tanh(ctx Context) Tensor
|
||||
GELU(ctx Context) Tensor
|
||||
SILU(ctx Context) Tensor
|
||||
@ -147,9 +151,13 @@ type Tensor interface {
|
||||
Unpad(ctx Context, shape ...int) Tensor
|
||||
|
||||
Stack(ctx Context, dim int, s ...Tensor) Tensor
|
||||
|
||||
// Repeat repeats the tensor n times along dimension dim
|
||||
Repeat(ctx Context, dim, n int) Tensor
|
||||
Concat(ctx Context, t2 Tensor, dim int) Tensor
|
||||
Rows(ctx Context, t2 Tensor) Tensor
|
||||
Copy(ctx Context, t2 Tensor) Tensor
|
||||
Duplicate(ctx Context) Tensor
|
||||
}
|
||||
|
||||
// ScaledDotProductAttention implements a fused attention
|
||||
@ -214,7 +222,7 @@ func Dump(ctx Context, t Tensor, opts ...DumpOptions) string {
|
||||
return strconv.FormatFloat(float64(f), 'f', opts[0].Precision, 32)
|
||||
})
|
||||
case DTypeF16, DTypeQ80, DTypeQ40:
|
||||
f32 := ctx.Empty(DTypeF32, t.Shape()...)
|
||||
f32 := ctx.Input().Empty(DTypeF32, t.Shape()...)
|
||||
f32 = t.Copy(ctx, f32)
|
||||
return dump[[]float32](ctx, f32, opts[0].Items, func(f float32) string {
|
||||
return strconv.FormatFloat(float64(f), 'f', opts[0].Precision, 32)
|
||||
|
@ -711,6 +711,13 @@ func (t *Tensor) DType() ml.DType {
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tensor) Neg(ctx ml.Context) ml.Tensor {
|
||||
return &Tensor{
|
||||
b: t.b,
|
||||
t: C.ggml_neg(ctx.(*Context).ctx, t.t),
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tensor) Add(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
|
||||
return &Tensor{
|
||||
b: t.b,
|
||||
@ -718,6 +725,27 @@ func (t *Tensor) Add(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tensor) Repeat(ctx ml.Context, dim, n int) ml.Tensor {
|
||||
if dim < 0 || dim >= C.GGML_MAX_DIMS {
|
||||
panic("invalid dimension")
|
||||
}
|
||||
|
||||
shape := make([]C.int64_t, C.GGML_MAX_DIMS)
|
||||
for i := range C.GGML_MAX_DIMS {
|
||||
if i == dim {
|
||||
shape[i] = C.int64_t(t.Dim(i) * n)
|
||||
} else {
|
||||
shape[i] = C.int64_t(t.Dim(i))
|
||||
}
|
||||
}
|
||||
|
||||
tmpl := C.ggml_new_tensor(ctx.(*Context).ctx, t.t._type, C.int(len(shape)), unsafe.SliceData(shape))
|
||||
return &Tensor{
|
||||
b: t.b,
|
||||
t: C.ggml_repeat(ctx.(*Context).ctx, t.t, tmpl),
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tensor) Stack(ctx ml.Context, dim int, s ...ml.Tensor) ml.Tensor {
|
||||
if len(s) > 0 {
|
||||
return t.Concat(ctx, s[0].Stack(ctx, dim, s[1:]...), dim)
|
||||
@ -854,6 +882,20 @@ func (t *Tensor) Softmax(ctx ml.Context) ml.Tensor {
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tensor) Sin(ctx ml.Context) ml.Tensor {
|
||||
return &Tensor{
|
||||
b: t.b,
|
||||
t: C.ggml_sin(ctx.(*Context).ctx, t.t),
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tensor) Cos(ctx ml.Context) ml.Tensor {
|
||||
return &Tensor{
|
||||
b: t.b,
|
||||
t: C.ggml_cos(ctx.(*Context).ctx, t.t),
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tensor) Tanh(ctx ml.Context) ml.Tensor {
|
||||
return &Tensor{
|
||||
b: t.b,
|
||||
@ -942,6 +984,13 @@ func (t *Tensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, ropeDi
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tensor) IM2Col(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int) ml.Tensor {
|
||||
return &Tensor{
|
||||
b: t.b,
|
||||
t: C.ggml_im2col(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1), true, C.GGML_TYPE_F32),
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tensor) GELU(ctx ml.Context) ml.Tensor {
|
||||
return &Tensor{
|
||||
b: t.b,
|
||||
@ -1010,3 +1059,10 @@ func (t *Tensor) ScaledDotProductAttention(ctx ml.Context, key, value, mask ml.T
|
||||
return kqv.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tensor) Duplicate(ctx ml.Context) ml.Tensor {
|
||||
return &Tensor{
|
||||
b: t.b,
|
||||
t: C.ggml_dup(ctx.(*Context).ctx, t.t),
|
||||
}
|
||||
}
|
||||
|
@ -3083,6 +3083,13 @@ kernel void kernel_cos(
|
||||
dst[tpig] = cos(src0[tpig]);
|
||||
}
|
||||
|
||||
kernel void kernel_neg(
|
||||
device const float * src0,
|
||||
device float * dst,
|
||||
uint tpig[[thread_position_in_grid]]) {
|
||||
dst[tpig] = -src0[tpig];
|
||||
}
|
||||
|
||||
kernel void kernel_sum_rows(
|
||||
device const float * src0,
|
||||
device float * dst,
|
||||
|
15
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
vendored
15
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
vendored
@ -423,6 +423,7 @@ enum ggml_metal_kernel_type {
|
||||
GGML_METAL_KERNEL_TYPE_SQRT,
|
||||
GGML_METAL_KERNEL_TYPE_SIN,
|
||||
GGML_METAL_KERNEL_TYPE_COS,
|
||||
GGML_METAL_KERNEL_TYPE_NEG,
|
||||
GGML_METAL_KERNEL_TYPE_SUM_ROWS,
|
||||
GGML_METAL_KERNEL_TYPE_POOL_2D_AVG_F32,
|
||||
GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32,
|
||||
@ -1039,6 +1040,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SQRT, sqrt, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SIN, sin, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_COS, cos, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_NEG, neg, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARGMAX, argmax, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_AVG_F32, pool_2d_avg_f32, true);
|
||||
@ -1202,6 +1204,7 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
|
||||
case GGML_UNARY_OP_GELU_QUICK:
|
||||
case GGML_UNARY_OP_SILU:
|
||||
case GGML_UNARY_OP_ELU:
|
||||
case GGML_UNARY_OP_NEG:
|
||||
return ggml_is_contiguous(op->src[0]);
|
||||
default:
|
||||
return false;
|
||||
@ -1873,6 +1876,18 @@ static void ggml_metal_encode_node(
|
||||
|
||||
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
||||
} break;
|
||||
case GGML_UNARY_OP_NEG:
|
||||
{
|
||||
id<MTLComputePipelineState> pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_NEG].pipeline;
|
||||
|
||||
[encoder setComputePipelineState:pipeline];
|
||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
||||
|
||||
const int64_t n = ggml_nelements(dst);
|
||||
|
||||
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(dst->op));
|
||||
|
@ -945,6 +945,13 @@ kernel void kernel_cos(
|
||||
dst[tpig] = cos(src0[tpig]);
|
||||
}
|
||||
|
||||
kernel void kernel_neg(
|
||||
device const float * src0,
|
||||
device float * dst,
|
||||
uint tpig[[thread_position_in_grid]]) {
|
||||
dst[tpig] = -src0[tpig];
|
||||
}
|
||||
|
||||
kernel void kernel_sum_rows(
|
||||
device const float * src0,
|
||||
device float * dst,
|
||||
|
@ -11,7 +11,7 @@ import (
|
||||
"github.com/ollama/ollama/model/input"
|
||||
)
|
||||
|
||||
type TextOptions struct {
|
||||
type TextConfig struct {
|
||||
hiddenSize, numHeads, numKVHeads int
|
||||
attnKeyLen, attnValLen int
|
||||
eps, ropeScale float32
|
||||
@ -28,7 +28,7 @@ type TextModel struct {
|
||||
OutputNorm *nn.RMSNorm `gguf:"output_norm"`
|
||||
Output *nn.Linear `gguf:"output,alt:token_embd"`
|
||||
|
||||
*TextOptions
|
||||
*TextConfig
|
||||
}
|
||||
|
||||
const (
|
||||
@ -55,7 +55,7 @@ func newTextModel(c fs.Config) *TextModel {
|
||||
},
|
||||
),
|
||||
Layers: make([]TextLayer, numBlocks),
|
||||
TextOptions: &TextOptions{
|
||||
TextConfig: &TextConfig{
|
||||
hiddenSize: int(c.Uint("embedding_length")),
|
||||
numHeads: int(c.Uint("attention.head_count")),
|
||||
numKVHeads: int(c.Uint("attention.head_count_kv")),
|
||||
@ -84,7 +84,7 @@ type TextSelfAttention struct {
|
||||
Output *nn.Linear `gguf:"attn_output"`
|
||||
}
|
||||
|
||||
func (sa *TextSelfAttention) Forward(ctx ml.Context, layer int, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
|
||||
func (sa *TextSelfAttention) Forward(ctx ml.Context, layer int, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *TextConfig) ml.Tensor {
|
||||
batchSize := hiddenState.Dim(1)
|
||||
ropeType := uint32(2)
|
||||
|
||||
@ -120,12 +120,12 @@ func (sa *TextSelfAttention) Forward(ctx ml.Context, layer int, hiddenState, pos
|
||||
}
|
||||
|
||||
func (m *TextModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
|
||||
ropeBase := m.TextOptions.ropeLocalBase
|
||||
ropeBase := m.TextConfig.ropeLocalBase
|
||||
if (layer+1)%gemmaGlobalCacheCount == 0 {
|
||||
ropeBase = m.TextOptions.ropeGlobalBase
|
||||
ropeBase = m.TextConfig.ropeGlobalBase
|
||||
}
|
||||
|
||||
return key.RoPE(ctx, shift, nil, uint32(m.TextOptions.attnKeyLen), uint32(2), ropeBase, m.TextOptions.ropeScale), nil
|
||||
return key.RoPE(ctx, shift, nil, uint32(m.TextConfig.attnKeyLen), uint32(2), ropeBase, m.TextConfig.ropeScale), nil
|
||||
}
|
||||
|
||||
type TextMLP struct {
|
||||
@ -134,7 +134,7 @@ type TextMLP struct {
|
||||
Gate *nn.Linear `gguf:"ffn_gate"`
|
||||
}
|
||||
|
||||
func (mlp *TextMLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *TextOptions) ml.Tensor {
|
||||
func (mlp *TextMLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *TextConfig) ml.Tensor {
|
||||
hiddenState = mlp.Gate.Forward(ctx, hiddenState).GELU(ctx).Mul(ctx, mlp.Up.Forward(ctx, hiddenState))
|
||||
return mlp.Down.Forward(ctx, hiddenState)
|
||||
}
|
||||
@ -148,7 +148,7 @@ type TextLayer struct {
|
||||
PostMLPNorm *nn.RMSNorm `gguf:"post_ffw_norm"`
|
||||
}
|
||||
|
||||
func (l *TextLayer) Forward(ctx ml.Context, layer int, hiddenState, positionIDs, outputs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
|
||||
func (l *TextLayer) Forward(ctx ml.Context, layer int, hiddenState, positionIDs, outputs ml.Tensor, cache kvcache.Cache, opts *TextConfig) ml.Tensor {
|
||||
residual := hiddenState
|
||||
|
||||
hiddenState = l.AttentionNorm.Forward(ctx, hiddenState, opts.eps)
|
||||
@ -173,7 +173,7 @@ func (l *TextLayer) Forward(ctx ml.Context, layer int, hiddenState, positionIDs,
|
||||
|
||||
func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor, batch input.Batch, cache kvcache.Cache) ml.Tensor {
|
||||
hiddenState := m.TokenEmbedding.Forward(ctx, inputs)
|
||||
hiddenState = hiddenState.Scale(ctx, math.Sqrt(float64(m.TextOptions.hiddenSize)))
|
||||
hiddenState = hiddenState.Scale(ctx, math.Sqrt(float64(m.TextConfig.hiddenSize)))
|
||||
|
||||
// set image embeddings
|
||||
var except []int
|
||||
@ -206,7 +206,7 @@ func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor
|
||||
lastLayerOutputs = outputs
|
||||
}
|
||||
|
||||
hiddenState = layer.Forward(ctx, i, hiddenState, positions, lastLayerOutputs, cache, m.TextOptions)
|
||||
hiddenState = layer.Forward(ctx, i, hiddenState, positions, lastLayerOutputs, cache, m.TextConfig)
|
||||
}
|
||||
|
||||
hiddenState = m.OutputNorm.Forward(ctx, hiddenState, m.eps)
|
||||
|
56
model/models/mistral3/imageproc.go
Normal file
56
model/models/mistral3/imageproc.go
Normal file
@ -0,0 +1,56 @@
|
||||
package mistral3
|
||||
|
||||
import (
|
||||
"image"
|
||||
_ "image/jpeg"
|
||||
_ "image/png"
|
||||
"math"
|
||||
|
||||
"github.com/ollama/ollama/fs"
|
||||
"github.com/ollama/ollama/model/imageproc"
|
||||
)
|
||||
|
||||
type ImageProcessor struct {
|
||||
imageSize int
|
||||
patchSize int
|
||||
numChannels int
|
||||
longestEdge int
|
||||
}
|
||||
|
||||
func newImageProcessor(c fs.Config) ImageProcessor {
|
||||
return ImageProcessor{
|
||||
imageSize: int(c.Uint("vision.image_size", 1540)),
|
||||
patchSize: int(c.Uint("vision.patch_size", 14)),
|
||||
numChannels: int(c.Uint("vision.num_channels", 3)),
|
||||
longestEdge: int(c.Uint("vision.longest_edge", 1540)),
|
||||
}
|
||||
}
|
||||
|
||||
// ProcessImage prepares an image for the vision model by:
|
||||
// 1. Compositing transparent images
|
||||
// 2. Resizing to fit model constraints while preserving aspect ratio
|
||||
// 3. Normalizing pixel values
|
||||
// Returns normalized image data and the final size in pixels
|
||||
func (p *ImageProcessor) ProcessImage(img image.Image) ([]float32, image.Point, error) {
|
||||
img = imageproc.Composite(img)
|
||||
|
||||
size := img.Bounds().Size()
|
||||
ratio := max(float64(size.Y)/float64(p.longestEdge), float64(size.X)/float64(p.longestEdge))
|
||||
if ratio > 1.0 {
|
||||
size = image.Point{
|
||||
int(math.Floor(float64(size.X) / ratio)),
|
||||
int(math.Floor(float64(size.Y) / ratio)),
|
||||
}
|
||||
}
|
||||
|
||||
patchesX := (size.X-1)/p.patchSize + 1
|
||||
patchesY := (size.Y-1)/p.patchSize + 1
|
||||
size = image.Point{
|
||||
patchesX * p.patchSize,
|
||||
patchesY * p.patchSize,
|
||||
}
|
||||
|
||||
img = imageproc.Resize(img, size, imageproc.ResizeBilinear)
|
||||
data := imageproc.Normalize(img, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, true, true)
|
||||
return data, size, nil
|
||||
}
|
189
model/models/mistral3/model.go
Normal file
189
model/models/mistral3/model.go
Normal file
@ -0,0 +1,189 @@
|
||||
package mistral3
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"image"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
"github.com/ollama/ollama/fs"
|
||||
"github.com/ollama/ollama/kvcache"
|
||||
"github.com/ollama/ollama/ml"
|
||||
"github.com/ollama/ollama/ml/nn"
|
||||
"github.com/ollama/ollama/model"
|
||||
"github.com/ollama/ollama/model/input"
|
||||
)
|
||||
|
||||
type Model struct {
|
||||
model.Base
|
||||
*TextModel
|
||||
*VisionModel `gguf:"v,vision"`
|
||||
*MultiModalProjector `gguf:"mm"`
|
||||
|
||||
ImageProcessor
|
||||
}
|
||||
|
||||
// Implement MultimodalProcessor interface
|
||||
var _ model.MultimodalProcessor = (*Model)(nil)
|
||||
|
||||
func New(c fs.Config) (model.Model, error) {
|
||||
textModel, err := NewTextModel(c)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m := &Model{
|
||||
TextModel: textModel,
|
||||
VisionModel: newVisionModel(c),
|
||||
ImageProcessor: newImageProcessor(c),
|
||||
MultiModalProjector: newMultiModalProjector(c),
|
||||
}
|
||||
|
||||
m.Cache = kvcache.NewCausalCache(m.TextModel.Shift)
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
type PatchMerger struct {
|
||||
MergingLayer *nn.Linear `gguf:"merging_layer"`
|
||||
}
|
||||
|
||||
func (pm *PatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, size image.Point, spatialMergeSize int) ml.Tensor {
|
||||
d := visionOutputs.Dim(0)
|
||||
imageGrid := visionOutputs.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx).Reshape(ctx, size.X, size.Y, d)
|
||||
kernel := ctx.Input().Empty(ml.DTypeF32, spatialMergeSize, spatialMergeSize, d)
|
||||
patches := kernel.IM2Col(ctx, imageGrid, spatialMergeSize, spatialMergeSize, 0, 0, 1, 1)
|
||||
reshaped := patches.Reshape(ctx, d*spatialMergeSize*spatialMergeSize, patches.Dim(1)*patches.Dim(2))
|
||||
return pm.MergingLayer.Forward(ctx, reshaped)
|
||||
}
|
||||
|
||||
type MultiModalProjector struct {
|
||||
Norm *nn.RMSNorm `gguf:"norm"`
|
||||
Linear1 *nn.Linear `gguf:"linear_1"`
|
||||
Linear2 *nn.Linear `gguf:"linear_2"`
|
||||
PatchMerger *PatchMerger `gguf:"patch_merger"`
|
||||
|
||||
spatialMergeSize int
|
||||
eps float32
|
||||
patchSize int
|
||||
}
|
||||
|
||||
func (p *MultiModalProjector) Forward(ctx ml.Context, visionOutputs ml.Tensor, size image.Point) (ml.Tensor, image.Point) {
|
||||
visionOutputs = p.Norm.Forward(ctx, visionOutputs, p.eps)
|
||||
patchSizes := image.Point{size.X / p.patchSize, size.Y / p.patchSize}
|
||||
visionOutputs = p.PatchMerger.Forward(ctx, visionOutputs, patchSizes, p.spatialMergeSize)
|
||||
visionOutputs = p.Linear1.Forward(ctx, visionOutputs)
|
||||
visionOutputs = visionOutputs.GELU(ctx)
|
||||
return p.Linear2.Forward(ctx, visionOutputs), image.Point{patchSizes.X / p.spatialMergeSize, patchSizes.Y / p.spatialMergeSize}
|
||||
}
|
||||
|
||||
func newMultiModalProjector(c fs.Config) *MultiModalProjector {
|
||||
return &MultiModalProjector{
|
||||
spatialMergeSize: int(c.Uint("spatial_merge_size", 2)),
|
||||
eps: c.Float("text_config.rms_norm_eps", 1e-5),
|
||||
patchSize: int(c.Uint("vision.patch_size", 14)),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, error) {
|
||||
if len(m.VisionModel.Layers) == 0 {
|
||||
return nil, model.ErrNoVisionModel
|
||||
}
|
||||
|
||||
image, _, err := image.Decode(bytes.NewReader(multimodalData))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
f32s, size, err := m.ImageProcessor.ProcessImage(image)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pixelValues, err := ctx.Input().FromFloatSlice(f32s, size.X, size.Y, m.ImageProcessor.numChannels)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
visionOutputs := m.VisionModel.Forward(ctx, pixelValues)
|
||||
features, size := m.MultiModalProjector.Forward(ctx, visionOutputs, size)
|
||||
|
||||
// split into patches to be sent to the text transformer
|
||||
parent := imageFeatures{tensor: features}
|
||||
rows := make([]*imageRow, size.Y)
|
||||
for i := range rows {
|
||||
rows[i] = &imageRow{parent: &parent, s: i, shape: []int{features.Dim(0), size.X}}
|
||||
}
|
||||
|
||||
return rows, nil
|
||||
}
|
||||
|
||||
type imageFeatures struct {
|
||||
tensor ml.Tensor
|
||||
|
||||
dataOnce sync.Once
|
||||
data []float32
|
||||
}
|
||||
|
||||
type imageRow struct {
|
||||
parent *imageFeatures
|
||||
s int
|
||||
shape []int
|
||||
}
|
||||
|
||||
func (r *imageRow) data() []float32 {
|
||||
n := 1
|
||||
for _, s := range r.shape {
|
||||
n *= s
|
||||
}
|
||||
|
||||
return r.parent.data[r.s*n : (r.s+1)*n]
|
||||
}
|
||||
|
||||
// PostTokenize arranges Mistral 3's inputs for the forward pass
|
||||
// In Mistral 3 and Pixtral, the input patches are arranged as follows:
|
||||
// [IMG]...[IMG][IMG_BREAK][IMG]...[IMG][IMG_BREAK][IMG]...[IMG][IMG_END]
|
||||
// Each sequence of [IMG]...[IMG] is a set of patches of vision embeddings
|
||||
// that can be processed together.
|
||||
func (m *Model) PostTokenize(inputs []input.Input) ([]input.Input, error) {
|
||||
var result []input.Input
|
||||
for _, inp := range inputs {
|
||||
if inp.Multimodal == nil {
|
||||
result = append(result, inp)
|
||||
} else {
|
||||
inputMultimodal := inp.Multimodal.([]*imageRow)
|
||||
for i, row := range inputMultimodal {
|
||||
// [IMG]
|
||||
result = append(result, input.Input{Token: 10, Multimodal: row, MultimodalHash: inp.MultimodalHash, SameBatch: row.shape[1]})
|
||||
result = append(result, slices.Repeat([]input.Input{{Token: 10}}, row.shape[1]-1)...)
|
||||
if i == len(inputMultimodal)-1 {
|
||||
// [IMG_END]
|
||||
result = append(result, input.Input{Token: 13})
|
||||
} else {
|
||||
// [IMG_BREAK]
|
||||
result = append(result, input.Input{Token: 12})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
||||
positions, err := ctx.Input().FromIntSlice(batch.Positions, len(batch.Positions))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
outputs, err := ctx.Input().FromIntSlice(batch.Outputs, len(batch.Outputs))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return m.TextModel.Forward(ctx, batch.Inputs, positions, outputs, batch, m.Cache), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
model.Register("mistral3", New)
|
||||
}
|
177
model/models/mistral3/model_text.go
Normal file
177
model/models/mistral3/model_text.go
Normal file
@ -0,0 +1,177 @@
|
||||
package mistral3
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/fs"
|
||||
"github.com/ollama/ollama/kvcache"
|
||||
"github.com/ollama/ollama/ml"
|
||||
"github.com/ollama/ollama/ml/nn"
|
||||
"github.com/ollama/ollama/model"
|
||||
"github.com/ollama/ollama/model/input"
|
||||
)
|
||||
|
||||
type TextOptions struct {
|
||||
hiddenSize, numHeads, numKVHeads, headDim int
|
||||
eps, ropeBase, ropeScale float32
|
||||
ropeDim uint32
|
||||
}
|
||||
|
||||
type TextModel struct {
|
||||
model.Base
|
||||
model.BytePairEncoding
|
||||
|
||||
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
||||
Layers []Layer `gguf:"blk"`
|
||||
OutputNorm *nn.RMSNorm `gguf:"output_norm"`
|
||||
Output *nn.Linear `gguf:"output,alt:token_embd"`
|
||||
|
||||
*TextOptions
|
||||
}
|
||||
|
||||
type SelfAttention struct {
|
||||
Query *nn.Linear `gguf:"attn_q"`
|
||||
Key *nn.Linear `gguf:"attn_k"`
|
||||
Value *nn.Linear `gguf:"attn_v"`
|
||||
Output *nn.Linear `gguf:"attn_output"`
|
||||
}
|
||||
|
||||
func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
|
||||
batchSize := hiddenState.Dim(1)
|
||||
ropeType := uint32(0)
|
||||
headDim := opts.headDim
|
||||
if headDim == 0 {
|
||||
headDim = opts.hiddenSize / opts.numHeads
|
||||
}
|
||||
|
||||
q := sa.Query.Forward(ctx, hiddenState)
|
||||
q = q.Reshape(ctx, headDim, opts.numHeads, batchSize)
|
||||
q = q.RoPE(ctx, positionIDs, nil, opts.ropeDim, ropeType, opts.ropeBase, opts.ropeScale)
|
||||
|
||||
k := sa.Key.Forward(ctx, hiddenState)
|
||||
k = k.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
||||
k = k.RoPE(ctx, positionIDs, nil, opts.ropeDim, ropeType, opts.ropeBase, opts.ropeScale)
|
||||
|
||||
v := sa.Value.Forward(ctx, hiddenState)
|
||||
v = v.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
||||
|
||||
kqv := nn.Attention(ctx, q, k, v, 1.0/math.Sqrt(float64(headDim)), cache)
|
||||
kqv = kqv.Reshape(ctx, headDim*opts.numHeads, batchSize)
|
||||
return sa.Output.Forward(ctx, kqv)
|
||||
}
|
||||
|
||||
func (m *TextModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
|
||||
return key.RoPE(ctx, shift, nil, uint32(0), m.ropeDim, m.ropeBase, m.ropeScale), nil
|
||||
}
|
||||
|
||||
type MLP struct {
|
||||
Up *nn.Linear `gguf:"ffn_up"`
|
||||
Down *nn.Linear `gguf:"ffn_down"`
|
||||
Gate *nn.Linear `gguf:"ffn_gate"`
|
||||
}
|
||||
|
||||
func (mlp *MLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *TextOptions) ml.Tensor {
|
||||
hiddenState = mlp.Gate.Forward(ctx, hiddenState).SILU(ctx).Mul(ctx, mlp.Up.Forward(ctx, hiddenState))
|
||||
return mlp.Down.Forward(ctx, hiddenState)
|
||||
}
|
||||
|
||||
type Layer struct {
|
||||
AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
|
||||
SelfAttention *SelfAttention
|
||||
MLPNorm *nn.RMSNorm `gguf:"ffn_norm"`
|
||||
MLP *MLP
|
||||
}
|
||||
|
||||
func (l *Layer) Forward(ctx ml.Context, hiddenState, positionIDs, outputs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
|
||||
residual := hiddenState
|
||||
|
||||
hiddenState = l.AttentionNorm.Forward(ctx, hiddenState, opts.eps)
|
||||
hiddenState = l.SelfAttention.Forward(ctx, hiddenState, positionIDs, cache, opts)
|
||||
|
||||
// In the final layer (outputs != nil), optimize by pruning to just the token positions
|
||||
// we need logits for.
|
||||
if outputs != nil {
|
||||
hiddenState = hiddenState.Rows(ctx, outputs)
|
||||
residual = residual.Rows(ctx, outputs)
|
||||
}
|
||||
|
||||
hiddenState = hiddenState.Add(ctx, residual)
|
||||
residual = hiddenState
|
||||
|
||||
hiddenState = l.MLPNorm.Forward(ctx, hiddenState, opts.eps)
|
||||
hiddenState = l.MLP.Forward(ctx, hiddenState, opts)
|
||||
return hiddenState.Add(ctx, residual)
|
||||
}
|
||||
|
||||
func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor, batch input.Batch, cache kvcache.Cache) ml.Tensor {
|
||||
hiddenState := m.TokenEmbedding.Forward(ctx, inputs).Duplicate(ctx)
|
||||
|
||||
// image embeddings
|
||||
for _, image := range batch.Multimodal {
|
||||
row := image.Multimodal.(*imageRow)
|
||||
row.parent.dataOnce.Do(func() {
|
||||
// use a new, throwaway context so the image tensor is not added to the graph
|
||||
temp := m.Backend().NewContext()
|
||||
temp.Forward(row.parent.tensor).Compute(row.parent.tensor)
|
||||
row.parent.data = row.parent.tensor.Floats()
|
||||
temp.Close()
|
||||
})
|
||||
|
||||
imageFeature, err := ctx.Input().FromFloatSlice(row.data(), row.shape...)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
ctx.Forward(imageFeature.Copy(ctx, hiddenState.View(ctx, image.Index*hiddenState.Stride(1), imageFeature.Dim(0)*imageFeature.Dim(1))))
|
||||
}
|
||||
|
||||
for i, layer := range m.Layers {
|
||||
cache.SetLayer(i)
|
||||
|
||||
var lastLayerOutputs ml.Tensor
|
||||
if i == len(m.Layers)-1 {
|
||||
lastLayerOutputs = outputs
|
||||
}
|
||||
|
||||
hiddenState = layer.Forward(ctx, hiddenState, positions, lastLayerOutputs, cache, m.TextOptions)
|
||||
}
|
||||
|
||||
hiddenState = m.OutputNorm.Forward(ctx, hiddenState, m.eps)
|
||||
return m.Output.Forward(ctx, hiddenState)
|
||||
}
|
||||
|
||||
func NewTextModel(c fs.Config) (*TextModel, error) {
|
||||
if !strings.EqualFold(c.String("tokenizer.ggml.model"), "gpt2") {
|
||||
return nil, fmt.Errorf("tokenizer %s not yet supported", c.String("tokenizer.ggml.model"))
|
||||
}
|
||||
|
||||
textModel := &TextModel{
|
||||
BytePairEncoding: model.NewBytePairEncoding(
|
||||
c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
|
||||
&model.Vocabulary{
|
||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||
Types: c.Uints("tokenizer.ggml.token_type"),
|
||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||
BOS: int32(c.Uint("tokenizer.ggml.bos_token_id", 1)),
|
||||
AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
|
||||
EOS: int32(c.Uint("tokenizer.ggml.eos_token_id", 2)),
|
||||
AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
|
||||
},
|
||||
),
|
||||
Layers: make([]Layer, c.Uint("block_count")),
|
||||
TextOptions: &TextOptions{
|
||||
hiddenSize: int(c.Uint("embedding_length")),
|
||||
numHeads: int(c.Uint("attention.head_count")),
|
||||
numKVHeads: int(c.Uint("attention.head_count_kv")),
|
||||
headDim: int(c.Uint("attention.key_length")),
|
||||
eps: c.Float("attention.layer_norm_rms_epsilon"),
|
||||
ropeBase: c.Float("rope.freq_base"),
|
||||
ropeScale: c.Float("rope.freq_scale", 1),
|
||||
ropeDim: c.Uint("rope.dimension_count"),
|
||||
},
|
||||
}
|
||||
|
||||
return textModel, nil
|
||||
}
|
186
model/models/mistral3/model_vision.go
Normal file
186
model/models/mistral3/model_vision.go
Normal file
@ -0,0 +1,186 @@
|
||||
package mistral3
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/ollama/ollama/fs"
|
||||
"github.com/ollama/ollama/ml"
|
||||
"github.com/ollama/ollama/ml/nn"
|
||||
)
|
||||
|
||||
var batchSize int = 1
|
||||
|
||||
func rotateHalf(ctx ml.Context, t ml.Tensor) ml.Tensor {
|
||||
x1 := t.View(ctx, 0, t.Dim(0)/2, t.Stride(1), t.Dim(1), t.Stride(2), t.Dim(2), t.Stride(3), t.Dim(3))
|
||||
x2 := t.View(ctx, t.Stride(0)*t.Dim(0)/2, t.Dim(0)/2, t.Stride(1), t.Dim(1), t.Stride(2), t.Dim(2), t.Stride(3), t.Dim(3)).Contiguous(ctx)
|
||||
return x2.Neg(ctx).Concat(ctx, x1, 0)
|
||||
}
|
||||
|
||||
func applyRotaryPositionalEmbedding(ctx ml.Context, t, cos, sin ml.Tensor) ml.Tensor {
|
||||
return t.Mul(ctx, cos).Add(ctx, rotateHalf(ctx, t).Mul(ctx, sin))
|
||||
}
|
||||
|
||||
type VisionSelfAttention struct {
|
||||
Query *nn.Linear `gguf:"attn_q"`
|
||||
Key *nn.Linear `gguf:"attn_k"`
|
||||
Value *nn.Linear `gguf:"attn_v"`
|
||||
Output *nn.Linear `gguf:"attn_output"`
|
||||
}
|
||||
|
||||
func (sa *VisionSelfAttention) Forward(ctx ml.Context, hiddenStates, cos, sin ml.Tensor, opts *VisionModelOptions) ml.Tensor {
|
||||
query := sa.Query.Forward(ctx, hiddenStates)
|
||||
key := sa.Key.Forward(ctx, hiddenStates)
|
||||
value := sa.Value.Forward(ctx, hiddenStates)
|
||||
|
||||
query = query.Reshape(ctx, opts.headDim, opts.numHeads, query.Dim(1), batchSize)
|
||||
key = key.Reshape(ctx, opts.headDim, opts.numHeads, key.Dim(1), batchSize)
|
||||
value = value.Reshape(ctx, opts.headDim, opts.numHeads, value.Dim(1), batchSize)
|
||||
|
||||
query = applyRotaryPositionalEmbedding(ctx, query, cos, sin)
|
||||
key = applyRotaryPositionalEmbedding(ctx, key, cos, sin)
|
||||
|
||||
attention := nn.Attention(ctx, query, key, value, 1./math.Sqrt(float64(opts.headDim)), nil)
|
||||
attention = attention.Reshape(ctx, opts.hiddenSize, attention.Dim(2), batchSize)
|
||||
return sa.Output.Forward(ctx, attention)
|
||||
}
|
||||
|
||||
type VisionMLP struct {
|
||||
Gate *nn.Linear `gguf:"ffn_gate"`
|
||||
Up *nn.Linear `gguf:"ffn_up"`
|
||||
Down *nn.Linear `gguf:"ffn_down"`
|
||||
}
|
||||
|
||||
func (mlp *VisionMLP) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *VisionModelOptions) ml.Tensor {
|
||||
hiddenStates = mlp.Gate.Forward(ctx, hiddenStates).SILU(ctx).Mul(ctx, mlp.Up.Forward(ctx, hiddenStates))
|
||||
return mlp.Down.Forward(ctx, hiddenStates)
|
||||
}
|
||||
|
||||
type VisionEncoderLayer struct {
|
||||
AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
|
||||
SelfAttention *VisionSelfAttention
|
||||
FFNNorm *nn.RMSNorm `gguf:"ffn_norm"`
|
||||
MLP *VisionMLP
|
||||
}
|
||||
|
||||
func (e *VisionEncoderLayer) Forward(ctx ml.Context, hiddenStates, cos, sin ml.Tensor, opts *VisionModelOptions) ml.Tensor {
|
||||
residual := hiddenStates
|
||||
hiddenStates = e.AttentionNorm.Forward(ctx, hiddenStates, opts.eps)
|
||||
hiddenStates = e.SelfAttention.Forward(ctx, hiddenStates, cos, sin, opts)
|
||||
hiddenStates = hiddenStates.Add(ctx, residual)
|
||||
|
||||
residual = hiddenStates
|
||||
hiddenStates = e.FFNNorm.Forward(ctx, hiddenStates, opts.eps)
|
||||
hiddenStates = e.MLP.Forward(ctx, hiddenStates, opts)
|
||||
return hiddenStates.Add(ctx, residual)
|
||||
}
|
||||
|
||||
type VisionModelOptions struct {
|
||||
hiddenSize int
|
||||
numHeads int
|
||||
headDim int
|
||||
intermediateSize int
|
||||
imageSize int
|
||||
patchSize int
|
||||
numChannels int
|
||||
eps float32
|
||||
ropeBase float32
|
||||
}
|
||||
|
||||
type VisionModel struct {
|
||||
PatchEmbedding *nn.Conv2D `gguf:"patch_conv"`
|
||||
EncoderNorm *nn.RMSNorm `gguf:"encoder_norm"`
|
||||
Layers []VisionEncoderLayer `gguf:"blk"`
|
||||
|
||||
*VisionModelOptions
|
||||
}
|
||||
|
||||
func (m *VisionModel) positionalEmbedding(ctx ml.Context, positionIDs ml.Tensor) ml.Tensor {
|
||||
maxPatchesPerSide := m.imageSize / m.patchSize
|
||||
frequencies := m.headDim / 2
|
||||
frequenciesHeight := make([]float32, frequencies/2*maxPatchesPerSide)
|
||||
frequenciesWidth := make([]float32, frequencies/2*maxPatchesPerSide)
|
||||
for i := range frequencies {
|
||||
for j := range maxPatchesPerSide {
|
||||
frequency := float32(j) / float32(math.Pow(float64(m.ropeBase), float64(i)*2/float64(m.headDim)))
|
||||
if i%2 == 0 {
|
||||
frequenciesHeight[i/2*maxPatchesPerSide+j] = frequency
|
||||
} else {
|
||||
frequenciesWidth[i/2*maxPatchesPerSide+j] = frequency
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
h, err := ctx.Input().FromFloatSlice(frequenciesHeight, maxPatchesPerSide, frequencies/2)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
w, err := ctx.Input().FromFloatSlice(frequenciesWidth, maxPatchesPerSide, frequencies/2)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
h = h.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)
|
||||
w = w.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)
|
||||
|
||||
h = h.Repeat(ctx, 1, maxPatchesPerSide)
|
||||
h = h.Reshape(ctx, frequencies/2, maxPatchesPerSide, maxPatchesPerSide).Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
|
||||
w = w.Repeat(ctx, 2, maxPatchesPerSide)
|
||||
|
||||
inverseFrequencies := h.Concat(ctx, w, 0).Reshape(ctx, frequencies, maxPatchesPerSide*maxPatchesPerSide)
|
||||
inverseFrequencies = inverseFrequencies.Concat(ctx, inverseFrequencies, 0)
|
||||
return inverseFrequencies.Rows(ctx, positionIDs)
|
||||
}
|
||||
|
||||
func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor) ml.Tensor {
|
||||
numPatchesW := pixelValues.Dim(0) / m.patchSize
|
||||
numPatchesH := pixelValues.Dim(1) / m.patchSize
|
||||
numPatches := numPatchesW * numPatchesH
|
||||
|
||||
hiddenStates := m.PatchEmbedding.Forward(ctx, pixelValues, m.patchSize, m.patchSize, 0, 0, 1, 1)
|
||||
hiddenStates = hiddenStates.Reshape(ctx, numPatches, m.hiddenSize)
|
||||
hiddenStates = hiddenStates.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)
|
||||
hiddenStates = m.EncoderNorm.Forward(ctx, hiddenStates, m.VisionModelOptions.eps)
|
||||
|
||||
// Prepare position IDs for 2D rope
|
||||
positions := make([]int32, numPatches)
|
||||
for h := range numPatchesH {
|
||||
for w := range numPatchesW {
|
||||
idx := h*numPatchesW + w
|
||||
positions[idx] = int32(h*m.imageSize/m.patchSize + w)
|
||||
}
|
||||
}
|
||||
|
||||
positionIDs, err := ctx.Input().FromIntSlice(positions, len(positions))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
positionEmbedding := m.positionalEmbedding(ctx, positionIDs)
|
||||
cos, sin := positionEmbedding.Cos(ctx), positionEmbedding.Sin(ctx)
|
||||
cos = cos.Reshape(ctx, cos.Dim(0), 1, cos.Dim(1))
|
||||
sin = sin.Reshape(ctx, sin.Dim(0), 1, sin.Dim(1))
|
||||
|
||||
for _, layer := range m.Layers {
|
||||
hiddenStates = layer.Forward(ctx, hiddenStates, cos, sin, m.VisionModelOptions)
|
||||
}
|
||||
|
||||
return hiddenStates
|
||||
}
|
||||
|
||||
func newVisionModel(c fs.Config) *VisionModel {
|
||||
return &VisionModel{
|
||||
Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count", 24)),
|
||||
VisionModelOptions: &VisionModelOptions{
|
||||
hiddenSize: int(c.Uint("vision.embedding_length", 1024)),
|
||||
numHeads: int(c.Uint("vision.attention.head_count", 16)),
|
||||
headDim: int(c.Uint("vision.attention.key_length", 64)),
|
||||
intermediateSize: int(c.Uint("vision.feed_forward_length", 4096)),
|
||||
imageSize: int(c.Uint("vision.image_size", 1540)),
|
||||
patchSize: int(c.Uint("vision.patch_size", 14)),
|
||||
numChannels: int(c.Uint("vision.num_channels", 3)),
|
||||
eps: c.Float("vision.attention.layer_norm_epsilon", 1e-5),
|
||||
ropeBase: c.Float("vision.rope.freq_base", 10000.0),
|
||||
},
|
||||
}
|
||||
}
|
@ -186,7 +186,7 @@ func (m *VisionModel) Forward(ctx ml.Context, pixelValues, positionIDs, aspectRa
|
||||
hiddenState = hiddenState.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)
|
||||
|
||||
hiddenState = m.PreTilePositionEmbedding.Forward(ctx, hiddenState, aspectRatioIDs, m.VisionModelOptions)
|
||||
hiddenState = m.ClassEmbedding.Stack(ctx, 2, slices.Repeat([]ml.Tensor{m.ClassEmbedding}, m.numTiles-1)...).Concat(ctx, hiddenState, 1)
|
||||
hiddenState = m.ClassEmbedding.Repeat(ctx, 2, m.numTiles).Concat(ctx, hiddenState, 1)
|
||||
|
||||
hiddenState = m.PositionEmbedding.Forward(ctx, hiddenState, positionIDs, aspectRatioIDs, numPositions, m.VisionModelOptions)
|
||||
hiddenState = m.PreLayerNorm.Forward(ctx, hiddenState, m.eps)
|
||||
|
@ -4,5 +4,6 @@ import (
|
||||
_ "github.com/ollama/ollama/model/models/gemma2"
|
||||
_ "github.com/ollama/ollama/model/models/gemma3"
|
||||
_ "github.com/ollama/ollama/model/models/llama"
|
||||
_ "github.com/ollama/ollama/model/models/mistral3"
|
||||
_ "github.com/ollama/ollama/model/models/mllama"
|
||||
)
|
||||
|
@ -1,68 +0,0 @@
|
||||
package pixtral
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"image"
|
||||
_ "image/jpeg"
|
||||
_ "image/png"
|
||||
"io"
|
||||
"math"
|
||||
|
||||
"github.com/ollama/ollama/model/imageproc"
|
||||
)
|
||||
|
||||
func getNumImageTokens(imageSize, patchSize image.Point) image.Point {
|
||||
return image.Point{
|
||||
(imageSize.X-1)/patchSize.X + 1,
|
||||
(imageSize.Y-1)/patchSize.Y + 1,
|
||||
}
|
||||
}
|
||||
|
||||
func getResizeOutputImageSize(img image.Image, longestEdge int, patchSize image.Point) image.Point {
|
||||
b := img.Bounds()
|
||||
le := float64(longestEdge)
|
||||
ratio := math.Max(float64(b.Max.Y)/le, float64(b.Max.X)/le)
|
||||
|
||||
newSize := img.Bounds().Max
|
||||
|
||||
if ratio > 1.0 {
|
||||
newSize = image.Point{
|
||||
int(math.Ceil(float64(b.Max.X) / ratio)),
|
||||
int(math.Ceil(float64(b.Max.Y) / ratio)),
|
||||
}
|
||||
}
|
||||
|
||||
tokens := getNumImageTokens(newSize, patchSize)
|
||||
return image.Point{
|
||||
tokens.X * patchSize.X,
|
||||
tokens.Y * patchSize.Y,
|
||||
}
|
||||
}
|
||||
|
||||
func resizeImage(img image.Image, format string, longestEdge int, patchSize image.Point) image.Image {
|
||||
if format == "png" {
|
||||
img = imageproc.Composite(img)
|
||||
}
|
||||
|
||||
newSize := getResizeOutputImageSize(img, longestEdge, patchSize)
|
||||
|
||||
// todo should be ResizeBicubic, but it doesn't exist
|
||||
return imageproc.Resize(img, newSize, imageproc.ResizeBilinear)
|
||||
}
|
||||
|
||||
func Preprocess(imageData io.Reader) ([]float32, map[string]any, error) {
|
||||
img, format, err := image.Decode(imageData)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to decode image: %w", err)
|
||||
}
|
||||
|
||||
longestEdge := 1024
|
||||
patchSize := image.Point{16, 16}
|
||||
|
||||
img = resizeImage(img, format, longestEdge, patchSize)
|
||||
|
||||
data := imageproc.Normalize(img, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, true, true)
|
||||
|
||||
opts := map[string]any{}
|
||||
return data, opts, nil
|
||||
}
|
@ -1,219 +0,0 @@
|
||||
package pixtral
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"image"
|
||||
"image/png"
|
||||
"math"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
func TestGetNumImageTokens(t *testing.T) {
|
||||
type numImageTokensCase struct {
|
||||
ImageSize image.Point
|
||||
PatchSize image.Point
|
||||
Expected image.Point
|
||||
}
|
||||
|
||||
cases := []numImageTokensCase{
|
||||
{
|
||||
ImageSize: image.Point{1024, 764},
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.Point{64, 48},
|
||||
},
|
||||
{
|
||||
ImageSize: image.Point{800, 600},
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.Point{50, 38},
|
||||
},
|
||||
{
|
||||
ImageSize: image.Point{640, 480},
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.Point{40, 30},
|
||||
},
|
||||
{
|
||||
ImageSize: image.Point{320, 200},
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.Point{20, 13},
|
||||
},
|
||||
{
|
||||
ImageSize: image.Point{1320, 200},
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.Point{83, 13},
|
||||
},
|
||||
{
|
||||
ImageSize: image.Point{2000, 200},
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.Point{125, 13},
|
||||
},
|
||||
{
|
||||
ImageSize: image.Point{10000, 200},
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.Point{625, 13},
|
||||
},
|
||||
{
|
||||
ImageSize: image.Point{1131, 577},
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.Point{71, 37},
|
||||
},
|
||||
{
|
||||
ImageSize: image.Point{16, 16},
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.Point{1, 1},
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
actual := getNumImageTokens(c.ImageSize, c.PatchSize)
|
||||
|
||||
if diff := cmp.Diff(actual, c.Expected); diff != "" {
|
||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetResizeOutputImageSize(t *testing.T) {
|
||||
type resizeCase struct {
|
||||
Image image.Image
|
||||
LongestEdge int
|
||||
PatchSize image.Point
|
||||
Expected image.Point
|
||||
}
|
||||
|
||||
cases := []resizeCase{
|
||||
{
|
||||
Image: image.NewRGBA(image.Rect(0, 0, 1024, 768)),
|
||||
LongestEdge: 1024,
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.Point{1024, 768},
|
||||
},
|
||||
{
|
||||
Image: image.NewRGBA(image.Rect(0, 0, 1162, 690)),
|
||||
LongestEdge: 1024,
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.Point{1024, 624},
|
||||
},
|
||||
{
|
||||
Image: image.NewRGBA(image.Rect(0, 0, 300, 200)),
|
||||
LongestEdge: 1024,
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.Point{304, 208},
|
||||
},
|
||||
{
|
||||
Image: image.NewRGBA(image.Rect(0, 0, 1862, 522)),
|
||||
LongestEdge: 1024,
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.Point{1024, 288},
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
actual := getResizeOutputImageSize(c.Image, c.LongestEdge, c.PatchSize)
|
||||
|
||||
if diff := cmp.Diff(actual, c.Expected); diff != "" {
|
||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestResize(t *testing.T) {
|
||||
type resizeCase struct {
|
||||
Image image.Image
|
||||
LongestEdge int
|
||||
PatchSize image.Point
|
||||
Expected image.Image
|
||||
}
|
||||
|
||||
cases := []resizeCase{
|
||||
{
|
||||
Image: image.NewRGBA(image.Rect(0, 0, 1862, 522)),
|
||||
LongestEdge: 1024,
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.NewRGBA(image.Rect(0, 0, 1024, 288)),
|
||||
},
|
||||
{
|
||||
Image: image.NewRGBA(image.Rect(0, 0, 10, 10)),
|
||||
LongestEdge: 1024,
|
||||
PatchSize: image.Point{16, 16},
|
||||
Expected: image.NewRGBA(image.Rect(0, 0, 16, 16)),
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
actual := resizeImage(c.Image, "png", c.LongestEdge, c.PatchSize)
|
||||
|
||||
if actual.Bounds() != c.Expected.Bounds() {
|
||||
t.Errorf("image size incorrect: '%#v': expected: '%#v'", actual.Bounds(), c.Expected.Bounds())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreprocess(t *testing.T) {
|
||||
type preprocessCase struct {
|
||||
TestImage image.Image
|
||||
ExpectedLen int
|
||||
}
|
||||
|
||||
cases := []preprocessCase{
|
||||
{
|
||||
TestImage: image.NewRGBA(image.Rect(0, 0, 10, 10)),
|
||||
ExpectedLen: 16 * 16 * 3 * 1,
|
||||
},
|
||||
{
|
||||
TestImage: image.NewRGBA(image.Rect(0, 0, 2000, 2000)),
|
||||
ExpectedLen: 1024 * 1024 * 3 * 1,
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
var buf bytes.Buffer
|
||||
err := png.Encode(&buf, c.TestImage)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
imgData, _, err := Preprocess(&buf)
|
||||
if err != nil {
|
||||
t.Fatalf("error processing: %q", err)
|
||||
}
|
||||
|
||||
switch len(imgData) {
|
||||
case 0:
|
||||
t.Errorf("no image data returned")
|
||||
case c.ExpectedLen:
|
||||
// ok
|
||||
default:
|
||||
t.Errorf("unexpected image data length: %d, expected: %d", len(imgData), c.ExpectedLen)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreprocessImages(t *testing.T) {
|
||||
for _, testFile := range []string{"flight.png", "sportsball.png"} {
|
||||
f, err := os.Open(testFile)
|
||||
if err != nil {
|
||||
t.Skipf("skipping test, no test image found at %s", testFile)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
imgData, _, err := Preprocess(f)
|
||||
if err != nil {
|
||||
t.Fatalf("error processing: %q", err)
|
||||
}
|
||||
|
||||
byteData := make([]byte, len(imgData)*4) // float32 is 4 bytes
|
||||
for i, f := range imgData {
|
||||
binary.LittleEndian.PutUint32(byteData[i*4:], math.Float32bits(f))
|
||||
}
|
||||
|
||||
outputPath := "processed_" + testFile + ".bin"
|
||||
err = os.WriteFile(outputPath, byteData, 0o644)
|
||||
if err != nil {
|
||||
t.Fatalf("error writing processed image: %q", err)
|
||||
}
|
||||
}
|
||||
}
|
@ -263,6 +263,10 @@ func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) {
|
||||
continue
|
||||
}
|
||||
|
||||
if id := bpe.vocab.Encode(pair.value); id < 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
merges[pair.a].runes = append(left.runes, right.runes...)
|
||||
merges[pair.b].runes = nil
|
||||
|
||||
|
@ -211,16 +211,10 @@ func filesForModel(path string) ([]string, error) {
|
||||
}
|
||||
|
||||
var files []string
|
||||
if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 {
|
||||
if st, _ := glob(filepath.Join(path, "*.safetensors"), "application/octet-stream"); len(st) > 0 {
|
||||
// safetensors files might be unresolved git lfs references; skip if they are
|
||||
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
|
||||
files = append(files, st...)
|
||||
} else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 {
|
||||
// covers adapters.safetensors
|
||||
files = append(files, st...)
|
||||
} else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 {
|
||||
// covers adapter_model.safetensors
|
||||
files = append(files, st...)
|
||||
} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
|
||||
// pytorch files might also be unresolved git lfs references; skip if they are
|
||||
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
|
||||
|
Loading…
x
Reference in New Issue
Block a user