diff --git a/model/parsers/qwen3coder.go b/model/parsers/qwen3coder.go
index 0cff1ec15e..f44d7c8efd 100644
--- a/model/parsers/qwen3coder.go
+++ b/model/parsers/qwen3coder.go
@@ -11,6 +11,7 @@ import (
"strconv"
"strings"
"unicode"
+ "unicode/utf8"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/logutil"
@@ -204,12 +205,21 @@ func overlap(s, delim string) int {
}
func trailingWhitespaceLen(s string) int {
- for i := len(s) - 1; i >= 0; i-- {
- if !unicode.IsSpace(rune(s[i])) {
- return len(s) - i - 1
+ remaining := s
+ total := 0
+ for len(remaining) > 0 {
+ r, size := utf8.DecodeLastRuneInString(remaining)
+ // if it's an invalid utf8 rune, assume it isn't whitespace
+ if r == utf8.RuneError && size == 1 {
+ break
}
+ if !unicode.IsSpace(r) {
+ break
+ }
+ total += size
+ remaining = remaining[:len(remaining)-size]
}
- return len(s)
+ return total
}
type XMLFunctionCall struct {
diff --git a/model/parsers/qwen3coder_test.go b/model/parsers/qwen3coder_test.go
index 43823e6fc6..c77fe2d95f 100644
--- a/model/parsers/qwen3coder_test.go
+++ b/model/parsers/qwen3coder_test.go
@@ -166,6 +166,137 @@ func TestQwenParserStreaming(t *testing.T) {
},
},
},
+ {
+ desc: "unicode content",
+ steps: []step{
+ {
+ input: "你好 🌍testمرحبا",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "你好 🌍"},
+ qwenEventRawToolCall{raw: "test"},
+ qwenEventContent{content: "مرحبا"},
+ },
+ },
+ },
+ },
+ {
+ desc: "arabic text handling",
+ steps: []step{
+ {
+ input: "مرحبا بالعالم",
+ wantEvents: []qwenEvent{qwenEventContent{content: "مرحبا بالعالم"}},
+ },
+ },
+ },
+ {
+ desc: "emoji passthrough",
+ steps: []step{
+ {
+ input: "✅",
+ wantEvents: []qwenEvent{qwenEventContent{content: "✅"}},
+ },
+ },
+ },
+ {
+ desc: "emoji after tool call",
+ steps: []step{
+ {
+ input: "test完成 ✅",
+ wantEvents: []qwenEvent{
+ qwenEventRawToolCall{raw: "test"},
+ qwenEventContent{content: "完成 ✅"},
+ },
+ },
+ },
+ },
+ {
+ desc: "unicode streaming with whitespace handling",
+ steps: []step{
+ {
+ input: "مرحبا",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "مرحبا"},
+ },
+ },
+ {
+ input: " \n",
+ wantEvents: []qwenEvent{},
+ },
+ {
+ input: "世界",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: " \n世界"},
+ },
+ },
+ },
+ },
+ {
+ desc: "non-breaking space withheld across chunks",
+ steps: []step{
+ {
+ input: "Hello\u00a0",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "Hello"},
+ },
+ },
+ {
+ input: "world",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "\u00a0world"},
+ },
+ },
+ },
+ },
+ {
+ desc: "ideographic space before partial tool",
+ steps: []step{
+ {
+ input: "Hello\u3000abc",
+ wantEvents: []qwenEvent{},
+ },
+ {
+ input: "def",
+ wantEvents: []qwenEvent{
+ qwenEventRawToolCall{raw: "abc"},
+ qwenEventContent{content: "def"},
+ },
+ },
+ },
+ },
+ {
+ desc: "ideographic space before partial tool fakeout",
+ steps: []step{
+ {
+ input: "Hello\u3000abc",
+ wantEvents: []qwenEvent{
+ qwenEventContent{content: "\u3000abc"},
+ },
+ },
+ },
+ },
+ {
+ desc: "unicode with partial tool tag",
+ steps: []step{
+ {
+ input: "测试🎯 b and a < b"
},
},
},
+ {
+ name: "unicode in function names and parameters",
+ tools: []api.Tool{},
+ rawToolCall: `
+
+北京
+
+
+Hello! 你好! 🌟 مرحبا
+
+`,
+ wantToolCall: api.ToolCall{
+ Function: api.ToolCallFunction{
+ Name: "获取天气",
+ Arguments: map[string]any{
+ "城市": "北京",
+ "message": "Hello! 你好! 🌟 مرحبا",
+ },
+ },
+ },
+ },
}
for i, step := range steps {
@@ -360,6 +512,42 @@ ls && echo "a > b and a < b"
}
}
+func TestTrailingWhitespaceLenUnicode(t *testing.T) {
+ cases := []struct {
+ name string
+ input string
+ want int
+ }{
+ {
+ name: "ascii space",
+ input: "Hello ",
+ want: 1,
+ },
+ {
+ name: "non-breaking space",
+ input: "Hello\u00a0",
+ want: 2,
+ },
+ {
+ name: "ideographic space",
+ input: "Hello\u3000",
+ want: 3,
+ },
+ {
+ name: "multiple runes of whitespace",
+ input: "Hi\u00a0\u3000",
+ want: 5,
+ },
+ }
+
+ for _, tc := range cases {
+ got := trailingWhitespaceLen(tc.input)
+ if got != tc.want {
+ t.Errorf("%s: trailingWhitespaceLen(%q) = %d, want %d", tc.name, tc.input, got, tc.want)
+ }
+ }
+}
+
func TestQwenToolCallValueParsing(t *testing.T) {
cases := []struct {
desc string
@@ -867,6 +1055,8 @@ func TestTrailingWhitespaceLen(t *testing.T) {
{desc: "trailing whitespace with newlines", s: "abc \n", want: 2},
{desc: "only whitespace", s: " \n ", want: 4},
{desc: "leading whitespace doesn't count", s: " \n abc", want: 0},
+ {desc: "unicode with trailing space", s: "测试🎯 ", want: 1},
+ {desc: "unicode with trailing tab and newline", s: "مرحبا\t\n", want: 2},
}
for _, tc := range cases {
@@ -876,3 +1066,30 @@ func TestTrailingWhitespaceLen(t *testing.T) {
}
}
}
+
+func TestOverlapFunction(t *testing.T) {
+ cases := []struct {
+ desc string
+ s string
+ delim string
+ want int
+ }{
+ {desc: "no overlap", s: "hello", delim: "", want: 5},
+ {desc: "partial overlap", s: "hello", want: 3},
+ {desc: "unicode with partial overlap", s: "测试🎯", want: 3},
+ {desc: "unicode string with no overlap", s: "مرحبا", delim: "", want: 0},
+ {desc: "unicode at boundary", s: "世界<", delim: "", want: 1},
+ {desc: "unicode delimiter single rune", s: "hello🔧", delim: "🔧工具", want: len("🔧")},
+ {desc: "unicode delimiter multiple runes", s: "hello🔧工", delim: "🔧工具", want: len("🔧工")},
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.desc, func(t *testing.T) {
+ got := overlap(tc.s, tc.delim)
+ if got != tc.want {
+ t.Errorf("overlap(%q, %q) = %d, want %d", tc.s, tc.delim, got, tc.want)
+ }
+ })
+ }
+}