diff --git a/convert/convert.go b/convert/convert.go index b1d59c000..26bc72cc2 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -248,10 +248,5 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error { return err } - // iterate through all ts and print the name - for _, t := range ts { - fmt.Print(t.Name(), "\n") - } - return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts)) } diff --git a/convert/convert_mistral.go b/convert/convert_mistral.go index cda1a6cfb..b0a79ca48 100644 --- a/convert/convert_mistral.go +++ b/convert/convert_mistral.go @@ -93,7 +93,6 @@ func (p *mistral3Model) Tensors(ts []Tensor) []ggml.Tensor { var out []ggml.Tensor for _, t := range ts { - fmt.Println("tensor", t.Name(), "shape", t.Shape(), "kind", t.Kind()) if strings.HasSuffix(t.Name(), "attn_q.weight") || strings.HasSuffix(t.Name(), "attn_k.weight") { t.SetRepacker(p.repack) diff --git a/model/models/llama/model.go b/model/models/llama/model.go index 44e4a2e92..5c173997b 100644 --- a/model/models/llama/model.go +++ b/model/models/llama/model.go @@ -13,9 +13,9 @@ import ( ) type Options struct { - hiddenSize, numHeads, numKVHeads, headDim int - eps, ropeBase, ropeScale float32 - ropeDim uint32 + hiddenSize, numHeads, numKVHeads int + eps, ropeBase, ropeScale float32 + ropeDim uint32 } type Model struct { @@ -37,8 +37,6 @@ func New(c ml.Config) (model.Model, error) { m := Model{ BytePairEncoding: model.NewBytePairEncoding( - // TODO: need to set this in the conversion for mistral: - // tokenizer.ggml.pretokenizer = [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+ c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`), &model.Vocabulary{ Values: c.Strings("tokenizer.ggml.tokens"), @@ -55,7 +53,6 @@ func New(c ml.Config) (model.Model, error) { hiddenSize: int(c.Uint("embedding_length")), numHeads: int(c.Uint("attention.head_count")), numKVHeads: int(c.Uint("attention.head_count_kv")), - headDim: int(c.Uint("attention.key_length")), eps: c.Float("attention.layer_norm_rms_epsilon"), ropeBase: c.Float("rope.freq_base"), ropeScale: c.Float("rope.freq_scale", 1), @@ -78,36 +75,24 @@ type SelfAttention struct { func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor { batchSize := hiddenState.Dim(1) + headDim := opts.hiddenSize / opts.numHeads ropeType := uint32(0) - // Get head dimension - use explicit value if available, otherwise calculate - headDim := opts.headDim - if headDim == 0 { - headDim = opts.hiddenSize / opts.numHeads - } - // Query projection and reshape q := sa.Query.Forward(ctx, hiddenState) q = q.Reshape(ctx, headDim, opts.numHeads, batchSize) q = q.RoPE(ctx, positionIDs, sa.RopeFactors, opts.ropeDim, ropeType, opts.ropeBase, opts.ropeScale) - // Key projection and reshape k := sa.Key.Forward(ctx, hiddenState) k = k.Reshape(ctx, headDim, opts.numKVHeads, batchSize) k = k.RoPE(ctx, positionIDs, sa.RopeFactors, opts.ropeDim, ropeType, opts.ropeBase, opts.ropeScale) - // Value projection and reshape v := sa.Value.Forward(ctx, hiddenState) v = v.Reshape(ctx, headDim, opts.numKVHeads, batchSize) - // Attention computation scaleFactor := 1.0 / math.Sqrt(float64(headDim)) kqv := nn.Attention(ctx, q, k, v, scaleFactor, cache) + kqv = kqv.Reshape(ctx, opts.hiddenSize, batchSize) - // Reshape attention output for final projection - outputDim := headDim * opts.numHeads - kqv = kqv.Reshape(ctx, outputDim, batchSize) - - // Apply output projection return sa.Output.Forward(ctx, kqv) }