From 631fecc6d9fe644a793a9ca185c080db4fea77e7 Mon Sep 17 00:00:00 2001 From: Patrick Devine Date: Fri, 7 Mar 2025 11:20:55 -0800 Subject: [PATCH] temporary work around for converting spm --- convert/tokenizer_spm.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/convert/tokenizer_spm.go b/convert/tokenizer_spm.go index d8a012c08..340c3d581 100644 --- a/convert/tokenizer_spm.go +++ b/convert/tokenizer_spm.go @@ -47,6 +47,12 @@ func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) { v.Types = append(v.Types, int32(t)) default: tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL) + + // temporary fix to handle gemma3 broken configs + if slices.Contains([]string{"", ""}, piece.GetPiece()) { + tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL) + } + for _, t := range ast { if t.Content == piece.GetPiece() { tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)