From c00fa9cc2be428daeb3dd7cb67a86ff3a9d85cbd Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 6 Oct 2025 14:55:55 -0700 Subject: [PATCH] convert: split gate_up bias --- convert/convert_gptoss.go | 11 +++++++++++ convert/tensor.go | 12 ++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/convert/convert_gptoss.go b/convert/convert_gptoss.go index 2048b18bed..478f29f46e 100644 --- a/convert/convert_gptoss.go +++ b/convert/convert_gptoss.go @@ -85,6 +85,17 @@ func (m *gptossModel) Tensors(ts []Tensor) []*ggml.Tensor { case "scales": mxfp4s[name].scales = t } + } else if strings.HasSuffix(t.Name(), "gate_up_exps.bias") { + out = append(out, slices.Collect(splitDim(t, 1, + split{ + Replacer: strings.NewReplacer("gate_up_exps", "gate_exps"), + slices: []tensor.Slice{nil, tensor.S(0, int(t.Shape()[1]), 2)}, + }, + split{ + Replacer: strings.NewReplacer("gate_up_exps", "up_exps"), + slices: []tensor.Slice{nil, tensor.S(1, int(t.Shape()[1]), 2)}, + }, + ))...) } else { out = append(out, &ggml.Tensor{ Name: t.Name(), diff --git a/convert/tensor.go b/convert/tensor.go index c9565ed4e4..11a171db03 100644 --- a/convert/tensor.go +++ b/convert/tensor.go @@ -16,7 +16,8 @@ import ( type split struct { *strings.Replacer - dim int + dim int + slices []tensor.Slice // fn is an optional function to apply to the tensor after slicing fn func(tensor.Tensor) (tensor.Tensor, error) @@ -32,9 +33,12 @@ func splitDim(t Tensor, dim int, splits ...split) iter.Seq[*ggml.Tensor] { shape := slices.Clone(t.Shape()) shape[dim] = cmp.Or(uint64(split.dim), shape[dim]/uint64(len(splits))) - slice := slices.Repeat([]tensor.Slice{nil}, len(shape)) - slice[dim] = tensor.S(offset, offset+int(shape[dim])) - offset += int(shape[dim]) + slice := split.slices + if len(slice) == 0 { + slice := slices.Repeat([]tensor.Slice{nil}, len(shape)) + slice[dim] = tensor.S(offset, offset+int(shape[dim])) + offset += int(shape[dim]) + } t.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) { dims := make([]int, len(shape))