mirror of
https://github.com/ollama/ollama.git
synced 2025-03-22 15:52:18 +01:00
Add native support for converting Qwen2 family models (including Qwen2.5) from safetensors to gguf format so we can run it.
235 lines
6.1 KiB
Go
235 lines
6.1 KiB
Go
package convert
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"io/fs"
|
|
"log/slog"
|
|
"strings"
|
|
|
|
"github.com/ollama/ollama/llm"
|
|
)
|
|
|
|
type ModelParameters struct {
|
|
Architectures []string `json:"architectures"`
|
|
VocabSize uint32 `json:"vocab_size"`
|
|
}
|
|
|
|
type AdapterParameters struct {
|
|
Alpha uint32 `json:"lora_alpha"`
|
|
LoraLayers uint32 `json:"lora_layers"`
|
|
LoraParameters struct {
|
|
Rank uint32 `json:"rank"`
|
|
Alpha float32 `json:"alpha"`
|
|
Scale float32 `json:"scale"`
|
|
} `json:"lora_parameters"`
|
|
}
|
|
|
|
func (ModelParameters) KV(t *Tokenizer) llm.KV {
|
|
kv := llm.KV{
|
|
"general.file_type": uint32(1),
|
|
"general.quantization_version": uint32(2),
|
|
"tokenizer.ggml.pre": t.Pre,
|
|
"tokenizer.ggml.model": t.Vocabulary.Model,
|
|
"tokenizer.ggml.tokens": t.Vocabulary.Tokens,
|
|
"tokenizer.ggml.scores": t.Vocabulary.Scores,
|
|
"tokenizer.ggml.token_type": t.Vocabulary.Types,
|
|
}
|
|
|
|
if len(t.Merges) > 0 {
|
|
kv["tokenizer.ggml.merges"] = t.Merges
|
|
}
|
|
|
|
if t.Template != "" {
|
|
kv["tokenizer.chat_template"] = t.Template
|
|
}
|
|
|
|
for _, sv := range t.SpecialVocabulary {
|
|
kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
|
|
kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
|
|
}
|
|
|
|
return kv
|
|
}
|
|
|
|
func (p AdapterParameters) KV() llm.KV {
|
|
var alpha float32
|
|
if p.LoraParameters.Alpha == 0 {
|
|
alpha = float32(p.Alpha)
|
|
} else {
|
|
alpha = p.LoraParameters.Alpha
|
|
}
|
|
|
|
kv := llm.KV{
|
|
"adapter.lora.alpha": alpha,
|
|
"adapter.type": "lora",
|
|
"general.file_type": uint32(1),
|
|
"general.type": "adapter",
|
|
"general.version": "v0.2",
|
|
}
|
|
|
|
return kv
|
|
}
|
|
|
|
func (ModelParameters) specialTokenTypes() []string {
|
|
return []string{
|
|
"bos", "eos", "unk", "sep", "pad", "cls", "mask",
|
|
}
|
|
}
|
|
|
|
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
|
return llm.WriteGGUF(ws, kv, ts)
|
|
}
|
|
|
|
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
|
return llm.WriteGGUF(ws, kv, ts)
|
|
}
|
|
|
|
type ModelConverter interface {
|
|
// KV maps parameters to LLM key-values
|
|
KV(*Tokenizer) llm.KV
|
|
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
|
Tensors([]Tensor) []llm.Tensor
|
|
// Replacements returns a list of string pairs to replace in tensor names.
|
|
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
|
Replacements() []string
|
|
|
|
// specialTokenTypes returns any special token types the model uses
|
|
specialTokenTypes() []string
|
|
// writeFile writes the model to the provided io.WriteSeeker
|
|
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
|
}
|
|
|
|
type moreParser interface {
|
|
parseMore(fs.FS) error
|
|
}
|
|
|
|
type AdapterConverter interface {
|
|
// KV maps parameters to LLM key-values
|
|
KV(llm.KV) llm.KV
|
|
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
|
|
Tensors([]Tensor) []llm.Tensor
|
|
// Replacements returns a list of string pairs to replace in tensor names.
|
|
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
|
Replacements() []string
|
|
|
|
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
|
}
|
|
|
|
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
|
|
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var p AdapterParameters
|
|
if err := json.Unmarshal(bts, &p); err != nil {
|
|
return err
|
|
}
|
|
|
|
arch, ok := baseKV["general.architecture"]
|
|
if !ok {
|
|
return errors.New("architecture not set for the base model")
|
|
}
|
|
|
|
var conv AdapterConverter
|
|
switch arch {
|
|
case "llama":
|
|
conv = &llamaAdapter{}
|
|
case "gemma2":
|
|
conv = &gemma2Adapter{}
|
|
default:
|
|
return errors.New("unsupported architecture")
|
|
}
|
|
|
|
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := json.Unmarshal(bts, conv); err != nil {
|
|
return err
|
|
}
|
|
|
|
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
|
|
}
|
|
|
|
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
|
|
// and files it finds in the input path.
|
|
// Supported input model formats include safetensors.
|
|
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
|
|
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
|
|
bts, err := fs.ReadFile(fsys, "config.json")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var p ModelParameters
|
|
if err := json.Unmarshal(bts, &p); err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(p.Architectures) < 1 {
|
|
return errors.New("unknown architecture")
|
|
}
|
|
|
|
var conv ModelConverter
|
|
switch p.Architectures[0] {
|
|
case "LlamaForCausalLM", "MistralForCausalLM":
|
|
conv = &llamaModel{}
|
|
case "MixtralForCausalLM":
|
|
conv = &mixtralModel{}
|
|
case "GemmaForCausalLM":
|
|
conv = &gemmaModel{}
|
|
case "Gemma2ForCausalLM":
|
|
conv = &gemma2Model{}
|
|
case "Phi3ForCausalLM":
|
|
conv = &phi3Model{}
|
|
case "Qwen2ForCausalLM":
|
|
conv = &qwen2Model{}
|
|
case "BertModel":
|
|
conv = &bertModel{}
|
|
default:
|
|
return errors.New("unsupported architecture")
|
|
}
|
|
|
|
if err := json.Unmarshal(bts, conv); err != nil {
|
|
return err
|
|
}
|
|
|
|
if t, ok := conv.(moreParser); ok {
|
|
if err := t.parseMore(fsys); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
t, err := parseTokenizer(fsys, conv.specialTokenTypes())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
vocabSize := int(p.VocabSize)
|
|
switch {
|
|
case vocabSize > len(t.Vocabulary.Tokens):
|
|
slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
|
|
for i := range vocabSize - len(t.Vocabulary.Tokens) {
|
|
t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
|
|
t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
|
|
t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
|
|
}
|
|
case vocabSize < len(t.Vocabulary.Tokens):
|
|
return fmt.Errorf("vocabulary is larger than expected '%d' instead of '%d'", len(t.Vocabulary.Tokens), vocabSize)
|
|
default:
|
|
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
|
|
}
|
|
|
|
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
|
|
}
|