From 20e35938630e633a5f40fd2c8b097b0dbfbda8d9 Mon Sep 17 00:00:00 2001 From: jmorganca Date: Tue, 11 Mar 2025 18:40:55 +0100 Subject: [PATCH] model: validate left and right pairs before merging them --- model/process_text_spm.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/model/process_text_spm.go b/model/process_text_spm.go index 9d61746a2..68e3ed015 100644 --- a/model/process_text_spm.go +++ b/model/process_text_spm.go @@ -169,6 +169,10 @@ func (spm SentencePieceModel) Encode(s string, addSpecial bool) ([]int32, error) continue } + if id := spm.vocab.Encode(string(left.runes) + string(right.runes)); id < 0 { + continue + } + merges[pair.a].runes = append(left.runes, right.runes...) merges[pair.b].runes = nil merges[pair.a].n = right.n