From 763bb65dbb88004cd046c8acc0c8e889816e1828 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Thu, 30 May 2024 21:43:30 -0700 Subject: [PATCH] use `int32_t` for call to tokenize (#4738) * use `int32_t` for call to tokenize * variable naming * cleanup * fix crash --- llm/llm.go | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/llm/llm.go b/llm/llm.go index ead691f06..4492d39ff 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -63,12 +63,27 @@ func (llm *llamaModel) Tokenize(s string) []int { cs := C.CString(s) defer C.free(unsafe.Pointer(cs)) - tokens := make([]int, len(s)+2) - if n := C.llama_tokenize(llm.m, cs, C.int(len(s)), (*C.llama_token)(unsafe.Pointer(&tokens[0])), C.int(len(s)+2), false, true); n > 0 { - return tokens[:n] + ltokens := make([]C.llama_token, len(s)+2) + n := C.llama_tokenize( + llm.m, + cs, + C.int32_t(len(s)), + <okens[0], + C.int32_t(len(ltokens)), + false, + true, + ) + + if n < 0 { + return nil } - return nil + tokens := make([]int, n) + for i := 0; i < int(n); i++ { + tokens[i] = int(ltokens[i]) + } + + return tokens } func (llm *llamaModel) Detokenize(i32s []int) string {