mirror of
https://github.com/ollama/ollama.git
synced 2025-11-11 19:06:44 +01:00
automatically set num_keep if num_keep < 0
num_keep defines how many tokens to keep in the context when truncating inputs. if left to its default value of -1, the server will calculate num_keep to be the left of the system instructions
This commit is contained in:
@@ -78,6 +78,25 @@ func GenerateHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if opts.NumKeep < 0 {
|
||||
promptWithSystem, err := model.Prompt(api.GenerateRequest{})
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
promptNoSystem, err := model.Prompt(api.GenerateRequest{Context: []int{0}})
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
tokensWithSystem := llm.Encode(promptWithSystem)
|
||||
tokensNoSystem := llm.Encode(promptNoSystem)
|
||||
|
||||
llm.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
|
||||
}
|
||||
|
||||
loaded.llm = llm
|
||||
loaded.digest = model.Digest
|
||||
loaded.options = opts
|
||||
|
||||
Reference in New Issue
Block a user