From dae3ea266ee8dbdbe3df43b5baf6b66fe06d8891 Mon Sep 17 00:00:00 2001 From: nicole pardal Date: Mon, 3 Nov 2025 14:43:17 -0800 Subject: [PATCH] simplieifed eog handling logic --- runner/llamarunner/runner.go | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/runner/llamarunner/runner.go b/runner/llamarunner/runner.go index c8b8b6e7ec..889ea970b0 100644 --- a/runner/llamarunner/runner.go +++ b/runner/llamarunner/runner.go @@ -136,10 +136,8 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe // TODO: EOG token handling should be moved to tokenizer if params.embedding { - // If the original prompt ended with an EOG token, add it back after truncation - lastIsEOG := false - - eogToken := 0 + // make sure to end the truncated sequence with the eog token if one was originally provided + eogToken := -1 if len(inputs) > 0 && s.model.TokenIsEog(inputs[len(inputs)-1].token) { eogToken = inputs[len(inputs)-1].token @@ -148,19 +146,14 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe // If embedding only, truncate to the maximum context length - 1 (to account for BOS token) newLimit := s.cache.numCtx - 1 - if s.model.TokenIsEog(inputs[len(inputs)-1].token) && !s.model.TokenIsEog(inputs[newLimit-1].token) { - lastIsEOG = true - newLimit-- - } - if newLimit <= 0 { return nil, fmt.Errorf("input after truncation exceeds maximum context length") } newInputs = inputs[:newLimit] - if lastIsEOG { - newInputs = append(newInputs, input{token: eogToken}) + if eogToken >= 0 { + newInputs[len(newInputs)-1] = input{token: eogToken} } } else { // Otherwise, truncate in the middle