From d355d2020fcfc54c375eb697b7873742c3851881 Mon Sep 17 00:00:00 2001 From: Patrick Devine Date: Wed, 8 May 2024 16:07:46 -0700 Subject: [PATCH] add fixes for llama --- cmd/cmd.go | 2 +- convert/convert.go | 4 +--- convert/llama.go | 33 +++++++++++++++++++-------------- convert/safetensors.go | 2 ++ convert/torch.go | 38 ++++++++++++++++++++++++++++++++------ 5 files changed, 55 insertions(+), 24 deletions(-) diff --git a/cmd/cmd.go b/cmd/cmd.go index f79f8b97e..5d919d9a3 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -208,7 +208,7 @@ func tempZipFiles(path string) (string, error) { // pytorch files might also be unresolved git lfs references; skip if they are // covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin files = append(files, pt...) - } else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/octet-stream"); len(pt) > 0 { + } else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/zip"); len(pt) > 0 { // pytorch files might also be unresolved git lfs references; skip if they are // covers consolidated.x.pth, consolidated.pth files = append(files, pt...) diff --git a/convert/convert.go b/convert/convert.go index 899c8c44c..9a05fb529 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -74,11 +74,9 @@ func GetModelFormat(dirname string) (ModelFormat, error) { } for _, fn := range files { - slog.Debug(fmt.Sprintf("file = %s", fn)) if strings.HasSuffix(fn, ".safetensors") { return &SafetensorFormat{}, nil - //} else if strings.HasSuffix(fn, ".bin") { - } else if strings.HasSuffix(fn, ".pth") { + } else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") { slog.Debug("model is torch") return &TorchFormat{}, nil } diff --git a/convert/llama.go b/convert/llama.go index 8cb162e7f..9fdcd02b4 100644 --- a/convert/llama.go +++ b/convert/llama.go @@ -23,12 +23,24 @@ type LlamaModel struct { } func llamaTorchLayerHandler(w io.Writer, r torchWriterTo) error { - slog.Debug(fmt.Sprintf("repacking layer '%s'", r.t.Name)) - data := r.storage.(*pytorch.HalfStorage).Data - tData := make([]uint16, len(data)) - for cnt, v := range data { - tData[cnt] = uint16(float16.Fromfloat32(v)) + var tData []uint16 + switch r.storage.(type) { + case *pytorch.HalfStorage: + data := r.storage.(*pytorch.HalfStorage).Data + tData = make([]uint16, len(data)) + for cnt, v := range data { + tData[cnt] = uint16(float16.Fromfloat32(v)) + } + case *pytorch.BFloat16Storage: + data := r.storage.(*pytorch.BFloat16Storage).Data + tData = make([]uint16, len(data)) + + for cnt, v := range data { + tData[cnt] = uint16(float16.Fromfloat32(v)) + } + default: + return fmt.Errorf("unknown storage type for torch") } var err error @@ -44,8 +56,6 @@ func llamaTorchLayerHandler(w io.Writer, r torchWriterTo) error { return fmt.Errorf("unknown layer type") } - slog.Debug(fmt.Sprintf("heads = %d", heads)) - tData, err = llamaRepack(tData, int(heads), r.t.Shape) if err != nil { return err @@ -106,7 +116,6 @@ func (m *LlamaModel) GetTensors() error { for _, l := range t { matches := re.FindAllStringSubmatch(l.Name, -1) if len(matches) > 0 { - slog.Debug(fmt.Sprintf("setting handler for: %s", l.Name)) switch m.Format.(type) { case *TorchFormat: wt := l.WriterTo.(torchWriterTo) @@ -182,10 +191,8 @@ func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { "llama.attention.head_count": uint32(m.Params.AttentionHeads), "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - //"general.file_type": uint32(1), - "general.file_type": uint32(2), - //"tokenizer.ggml.model": "llama", - "tokenizer.ggml.model": "gpt2", + "general.file_type": uint32(2), + "tokenizer.ggml.model": "gpt2", "tokenizer.ggml.tokens": m.Vocab.Tokens, "tokenizer.ggml.token_type": m.Vocab.Types, @@ -193,8 +200,6 @@ func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { "tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID), "tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID), "tokenizer.ggml.unknown_token_id": uint32(0), - //"tokenizer.ggml.add_bos_token": true, - //"tokenizer.ggml.add_eos_token": false, } if len(m.Vocab.Merges) > 0 { diff --git a/convert/safetensors.go b/convert/safetensors.go index 64aaf8669..b52a048df 100644 --- a/convert/safetensors.go +++ b/convert/safetensors.go @@ -131,6 +131,8 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) shape[i] = uint64(data.Shape[i]) } + slog.Debug(fmt.Sprintf("'%45s': '%30s' %10d [%#v]", k, ggufName, size, data.Shape)) + t := llm.Tensor{ Name: ggufName, Kind: kind, diff --git a/convert/torch.go b/convert/torch.go index 0ad10c0e6..803827ba4 100644 --- a/convert/torch.go +++ b/convert/torch.go @@ -33,11 +33,15 @@ type TorchFormat struct{} func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) { slog.Debug("getting torch tensors") - //files, err := filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin")) - files, err := filepath.Glob(filepath.Join(dirpath, "consolidatedr.*.pth")) + var files []string + var err error + files, err = filepath.Glob(filepath.Join(dirpath, "consolidated.*.pth")) if err != nil { - slog.Error("didn't find any torch files") - return nil, err + files, err = filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin")) + if err != nil { + slog.Error("didn't find any torch files") + return nil, err + } } var offset uint64 @@ -78,7 +82,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, slog.Error(err.Error()) return nil, err } - slog.Debug(fmt.Sprintf("finding name for '%s' -> '%s'", k.(string), ggufName)) + slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape)) shape := []uint64{0, 0, 0, 0} for i := range tshape { @@ -236,7 +240,7 @@ func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) { return 0, r.handler(w, r) } - switch r.storage.(type) { + switch storage := r.storage.(type) { case *pytorch.FloatStorage: slog.Warn(fmt.Sprintf("unexpected storage found for layer '%s'; skipping", r.t.Name)) return 0, nil @@ -259,6 +263,28 @@ func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) { return 0, err } } + case *pytorch.BFloat16Storage: + data := r.storage.(*pytorch.BFloat16Storage).Data + switch r.t.Kind { + case 0: + if err = binary.Write(w, r.bo, data); err != nil { + return 0, err + } + case 1: + tData := make([]uint16, len(data)) + + for cnt, v := range data { + tData[cnt] = uint16(float16.Fromfloat32(v)) + } + + if err = binary.Write(w, r.bo, tData); err != nil { + return 0, err + } + default: + return 0, fmt.Errorf("unknown storage kind: %d", r.t.Kind) + } + default: + return 0, fmt.Errorf("unknown storage type: %T", storage) } return 0, nil