model: expose vocabulary to use for sampling

This commit is contained in:
ParthSareen 2025-04-02 14:30:04 -07:00
parent 92006a31a2
commit 486a694852
2 changed files with 5 additions and 0 deletions

View File

@ -32,6 +32,7 @@ type TextProcessor interface {
Encode(s string, addSpecial bool) ([]int32, error)
Decode([]int32) (string, error)
Is(int32, Special) bool
Vocabulary() *Vocabulary
}
type Vocabulary struct {

View File

@ -17,6 +17,10 @@ type SentencePieceModel struct {
var _ TextProcessor = (*SentencePieceModel)(nil)
func (spm SentencePieceModel) Vocabulary() *Vocabulary {
return spm.vocab
}
func NewSentencePieceModel(vocab *Vocabulary) SentencePieceModel {
slog.Debug("Tokens", "num tokens", len(vocab.Values), "vals", vocab.Values[:5], "scores", vocab.Scores[:5], "types", vocab.Types[:5])