mirror of
https://github.com/ollama/ollama.git
synced 2025-11-11 23:47:28 +01:00
convert: split gate_up bias
This commit is contained in:
committed by
Michael Yang
parent
df411c4b02
commit
c00fa9cc2b
@@ -85,6 +85,17 @@ func (m *gptossModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
|||||||
case "scales":
|
case "scales":
|
||||||
mxfp4s[name].scales = t
|
mxfp4s[name].scales = t
|
||||||
}
|
}
|
||||||
|
} else if strings.HasSuffix(t.Name(), "gate_up_exps.bias") {
|
||||||
|
out = append(out, slices.Collect(splitDim(t, 1,
|
||||||
|
split{
|
||||||
|
Replacer: strings.NewReplacer("gate_up_exps", "gate_exps"),
|
||||||
|
slices: []tensor.Slice{nil, tensor.S(0, int(t.Shape()[1]), 2)},
|
||||||
|
},
|
||||||
|
split{
|
||||||
|
Replacer: strings.NewReplacer("gate_up_exps", "up_exps"),
|
||||||
|
slices: []tensor.Slice{nil, tensor.S(1, int(t.Shape()[1]), 2)},
|
||||||
|
},
|
||||||
|
))...)
|
||||||
} else {
|
} else {
|
||||||
out = append(out, &ggml.Tensor{
|
out = append(out, &ggml.Tensor{
|
||||||
Name: t.Name(),
|
Name: t.Name(),
|
||||||
|
|||||||
@@ -16,7 +16,8 @@ import (
|
|||||||
|
|
||||||
type split struct {
|
type split struct {
|
||||||
*strings.Replacer
|
*strings.Replacer
|
||||||
dim int
|
dim int
|
||||||
|
slices []tensor.Slice
|
||||||
|
|
||||||
// fn is an optional function to apply to the tensor after slicing
|
// fn is an optional function to apply to the tensor after slicing
|
||||||
fn func(tensor.Tensor) (tensor.Tensor, error)
|
fn func(tensor.Tensor) (tensor.Tensor, error)
|
||||||
@@ -32,9 +33,12 @@ func splitDim(t Tensor, dim int, splits ...split) iter.Seq[*ggml.Tensor] {
|
|||||||
shape := slices.Clone(t.Shape())
|
shape := slices.Clone(t.Shape())
|
||||||
shape[dim] = cmp.Or(uint64(split.dim), shape[dim]/uint64(len(splits)))
|
shape[dim] = cmp.Or(uint64(split.dim), shape[dim]/uint64(len(splits)))
|
||||||
|
|
||||||
slice := slices.Repeat([]tensor.Slice{nil}, len(shape))
|
slice := split.slices
|
||||||
slice[dim] = tensor.S(offset, offset+int(shape[dim]))
|
if len(slice) == 0 {
|
||||||
offset += int(shape[dim])
|
slice := slices.Repeat([]tensor.Slice{nil}, len(shape))
|
||||||
|
slice[dim] = tensor.S(offset, offset+int(shape[dim]))
|
||||||
|
offset += int(shape[dim])
|
||||||
|
}
|
||||||
|
|
||||||
t.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
t.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
dims := make([]int, len(shape))
|
dims := make([]int, len(shape))
|
||||||
|
|||||||
Reference in New Issue
Block a user