model: validate left and right pairs before merging them

This commit is contained in:
jmorganca 2025-03-11 18:40:55 +01:00 committed by Michael Yang
parent 63a394068c
commit 20e3593863

View File

@ -169,6 +169,10 @@ func (spm SentencePieceModel) Encode(s string, addSpecial bool) ([]int32, error)
continue
}
if id := spm.vocab.Encode(string(left.runes) + string(right.runes)); id < 0 {
continue
}
merges[pair.a].runes = append(left.runes, right.runes...)
merges[pair.b].runes = nil
merges[pair.a].n = right.n