simplieifed eog handling logic

This commit is contained in:
nicole pardal
2025-11-03 14:43:17 -08:00
parent 8e147ce450
commit dae3ea266e

View File

@@ -136,10 +136,8 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
// TODO: EOG token handling should be moved to tokenizer // TODO: EOG token handling should be moved to tokenizer
if params.embedding { if params.embedding {
// If the original prompt ended with an EOG token, add it back after truncation // make sure to end the truncated sequence with the eog token if one was originally provided
lastIsEOG := false eogToken := -1
eogToken := 0
if len(inputs) > 0 && s.model.TokenIsEog(inputs[len(inputs)-1].token) { if len(inputs) > 0 && s.model.TokenIsEog(inputs[len(inputs)-1].token) {
eogToken = inputs[len(inputs)-1].token eogToken = inputs[len(inputs)-1].token
@@ -148,19 +146,14 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
// If embedding only, truncate to the maximum context length - 1 (to account for BOS token) // If embedding only, truncate to the maximum context length - 1 (to account for BOS token)
newLimit := s.cache.numCtx - 1 newLimit := s.cache.numCtx - 1
if s.model.TokenIsEog(inputs[len(inputs)-1].token) && !s.model.TokenIsEog(inputs[newLimit-1].token) {
lastIsEOG = true
newLimit--
}
if newLimit <= 0 { if newLimit <= 0 {
return nil, fmt.Errorf("input after truncation exceeds maximum context length") return nil, fmt.Errorf("input after truncation exceeds maximum context length")
} }
newInputs = inputs[:newLimit] newInputs = inputs[:newLimit]
if lastIsEOG { if eogToken >= 0 {
newInputs = append(newInputs, input{token: eogToken}) newInputs[len(newInputs)-1] = input{token: eogToken}
} }
} else { } else {
// Otherwise, truncate in the middle // Otherwise, truncate in the middle