From fc5fb09f514758ae9f59b11632c3d8f32e951d49 Mon Sep 17 00:00:00 2001 From: Devon Rifkin Date: Tue, 19 Aug 2025 18:34:49 -0700 Subject: [PATCH] model: fix boundary in bpe 0x007e is a tilde and was getting adjusted (+0x00a2) to 0x0120 in the encode, but then in the decode it was getting adjusted down (-0x0100) to 0x0020. The boundary for the +0x00a2 case has been adjusted to fix this Fixes: #11966 --- model/bytepairencoding.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/bytepairencoding.go b/model/bytepairencoding.go index 7ade497daa..e4083dfceb 100644 --- a/model/bytepairencoding.go +++ b/model/bytepairencoding.go @@ -109,7 +109,7 @@ func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) { r = 0x0143 case r <= 0x0020: r = r + 0x0100 - case r >= 0x007e && r <= 0x00a0: + case r >= 0x007f && r <= 0x00a0: r = r + 0x00a2 }