From f46a4b07a32c6b4d16f5c51abda714ccbff8fddf Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Fri, 7 Feb 2025 14:07:00 -0800 Subject: [PATCH] model: benchmark bpe split --- model/process_text_test.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/model/process_text_test.go b/model/process_text_test.go index d03fafb79..cad1f94ff 100644 --- a/model/process_text_test.go +++ b/model/process_text_test.go @@ -209,7 +209,7 @@ func TestLlama(t *testing.T) { }) } -func Benchmark(b *testing.B) { +func BenchmarkBytePairEncoding(b *testing.B) { tokenizer := llama(b) bts, err := os.ReadFile(filepath.Join("testdata", "war-and-peace.txt")) if err != nil { @@ -243,5 +243,12 @@ func Benchmark(b *testing.B) { } } }) + + b.Run("split"+strconv.Itoa(n), func(b *testing.B) { + b.ResetTimer() + for range b.N { + slices.Collect(tokenizer.split(string(bts))) + } + }) } }