mirror of
https://github.com/ollama/ollama.git
synced 2025-11-13 04:36:41 +01:00
cleanup passing in harmony flag and add generate support
This commit is contained in:
@@ -31,7 +31,6 @@ import (
|
|||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/fs/ggml"
|
||||||
"github.com/ollama/ollama/harmony"
|
|
||||||
"github.com/ollama/ollama/llama"
|
"github.com/ollama/ollama/llama"
|
||||||
"github.com/ollama/ollama/logutil"
|
"github.com/ollama/ollama/logutil"
|
||||||
"github.com/ollama/ollama/ml"
|
"github.com/ollama/ollama/ml"
|
||||||
@@ -1348,9 +1347,9 @@ type CompletionRequest struct {
|
|||||||
Images []ImageData
|
Images []ImageData
|
||||||
Options *api.Options
|
Options *api.Options
|
||||||
|
|
||||||
Grammar string // set before sending the request to the subprocess
|
Grammar string // set before sending the request to the subprocess
|
||||||
FunctionNameMap *harmony.FunctionNameMap
|
UseHarmony bool
|
||||||
PrefillContent *bool
|
PrefillContent *bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// DoneReason represents the reason why a completion response is done
|
// DoneReason represents the reason why a completion response is done
|
||||||
|
|||||||
@@ -776,9 +776,8 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
var harmonyMessageHandler *harmony.HarmonyMessageHandler
|
var harmonyMessageHandler *harmony.HarmonyMessageHandler
|
||||||
var harmonyToolParser *harmony.HarmonyToolCallAccumulator
|
var harmonyToolParser *harmony.HarmonyToolCallAccumulator
|
||||||
if req.FunctionNameMap != nil {
|
if req.UseHarmony {
|
||||||
harmonyMessageHandler = harmony.NewHarmonyMessageHandler()
|
harmonyMessageHandler = harmony.NewHarmonyMessageHandler()
|
||||||
harmonyMessageHandler.FunctionNameMap = req.FunctionNameMap
|
|
||||||
harmonyMessageHandler.HarmonyParser.AddImplicitStartOrPrefill(req.PrefillContent)
|
harmonyMessageHandler.HarmonyParser.AddImplicitStartOrPrefill(req.PrefillContent)
|
||||||
harmonyToolParser = harmonyMessageHandler.CreateToolParser()
|
harmonyToolParser = harmonyMessageHandler.CreateToolParser()
|
||||||
}
|
}
|
||||||
@@ -893,10 +892,10 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
|
|||||||
} else {
|
} else {
|
||||||
var toolCalls []api.ToolCall
|
var toolCalls []api.ToolCall
|
||||||
if harmonyMessageHandler != nil {
|
if harmonyMessageHandler != nil {
|
||||||
|
// these tools still need to be transformed to the original function name
|
||||||
toolName, toolContent := harmonyToolParser.Drain()
|
toolName, toolContent := harmonyToolParser.Drain()
|
||||||
if toolName != nil {
|
if toolName != nil {
|
||||||
*toolName = strings.TrimPrefix(*toolName, "functions.")
|
*toolName = strings.TrimPrefix(*toolName, "functions.")
|
||||||
*toolName = harmonyMessageHandler.FunctionNameMap.OriginalFromConverted(*toolName)
|
|
||||||
var args api.ToolCallFunctionArguments
|
var args api.ToolCallFunctionArguments
|
||||||
if err := json.Unmarshal([]byte(toolContent), &args); err != nil {
|
if err := json.Unmarshal([]byte(toolContent), &args); err != nil {
|
||||||
http.Error(w, fmt.Sprintf("failed to unmarshal tool call function arguments: %v", err), http.StatusInternalServerError)
|
http.Error(w, fmt.Sprintf("failed to unmarshal tool call function arguments: %v", err), http.StatusInternalServerError)
|
||||||
|
|||||||
@@ -196,12 +196,10 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
useHarmony := harmony.ShouldUseHarmony(m.Config.ModelFamily, m.Template) && !req.Raw
|
useHarmony := harmony.ShouldUseHarmony(m.Config.ModelFamily, m.Template) && !req.Raw
|
||||||
var harmonyMessageHandler *harmony.HarmonyMessageHandler
|
var functionNameMap *harmony.FunctionNameMap
|
||||||
var harmonyToolParser *harmony.HarmonyToolCallAccumulator
|
|
||||||
if useHarmony {
|
if useHarmony {
|
||||||
harmonyMessageHandler = harmony.NewHarmonyMessageHandler()
|
functionNameMap = harmony.NewFunctionNameMap()
|
||||||
harmonyMessageHandler.HarmonyParser.AddImplicitStart()
|
|
||||||
harmonyToolParser = harmonyMessageHandler.CreateToolParser()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate Think value: string values currently only allowed for gptoss models
|
// Validate Think value: string values currently only allowed for gptoss models
|
||||||
@@ -345,16 +343,19 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
var sb strings.Builder
|
var sb strings.Builder
|
||||||
defer close(ch)
|
defer close(ch)
|
||||||
if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
|
if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
|
||||||
Prompt: prompt,
|
Prompt: prompt,
|
||||||
Images: images,
|
Images: images,
|
||||||
Format: req.Format,
|
Format: req.Format,
|
||||||
Options: opts,
|
Options: opts,
|
||||||
|
UseHarmony: useHarmony,
|
||||||
}, func(cr llm.CompletionResponse) {
|
}, func(cr llm.CompletionResponse) {
|
||||||
res := api.GenerateResponse{
|
res := api.GenerateResponse{
|
||||||
Model: req.Model,
|
Model: req.Model,
|
||||||
CreatedAt: time.Now().UTC(),
|
CreatedAt: time.Now().UTC(),
|
||||||
Response: cr.Content,
|
Response: cr.Content,
|
||||||
Done: cr.Done,
|
Done: cr.Done,
|
||||||
|
Thinking: cr.Thinking,
|
||||||
|
ToolCalls: cr.ToolCalls,
|
||||||
Metrics: api.Metrics{
|
Metrics: api.Metrics{
|
||||||
PromptEvalCount: cr.PromptEvalCount,
|
PromptEvalCount: cr.PromptEvalCount,
|
||||||
PromptEvalDuration: cr.PromptEvalDuration,
|
PromptEvalDuration: cr.PromptEvalDuration,
|
||||||
@@ -363,12 +364,22 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if res.Done {
|
||||||
|
res.DoneReason = cr.DoneReason.String()
|
||||||
|
res.TotalDuration = time.Since(checkpointStart)
|
||||||
|
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||||
|
}
|
||||||
|
|
||||||
if useHarmony {
|
if useHarmony {
|
||||||
content, thinking, toolContent := harmonyMessageHandler.AddContent(cr.Content, harmonyToolParser)
|
for i, tool := range res.ToolCalls {
|
||||||
res.Response = content
|
res.ToolCalls[i].Function.Name = functionNameMap.OriginalFromConverted(tool.Function.Name)
|
||||||
res.Thinking = thinking
|
}
|
||||||
harmonyToolParser.Add(toolContent)
|
if res.Response != "" || res.Thinking != "" || len(res.ToolCalls) > 0 || res.Done {
|
||||||
} else if thinkingState != nil {
|
ch <- res
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if thinkingState != nil {
|
||||||
thinking, content := thinkingState.AddContent(cr.Content)
|
thinking, content := thinkingState.AddContent(cr.Content)
|
||||||
res.Thinking = thinking
|
res.Thinking = thinking
|
||||||
res.Response = content
|
res.Response = content
|
||||||
@@ -379,30 +390,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if cr.Done {
|
if cr.Done {
|
||||||
if useHarmony {
|
|
||||||
toolName, toolContent := harmonyToolParser.Drain()
|
|
||||||
if toolName != nil {
|
|
||||||
*toolName = strings.TrimPrefix(*toolName, "functions.")
|
|
||||||
var args api.ToolCallFunctionArguments
|
|
||||||
if err := json.Unmarshal([]byte(toolContent), &args); err != nil {
|
|
||||||
errStr := fmt.Sprintf("error parsing tool call: raw='%s', err=%s", toolContent, err.Error())
|
|
||||||
ch <- gin.H{"error": errStr}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
res.ToolCalls = append(res.ToolCalls, api.ToolCall{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: *toolName,
|
|
||||||
Arguments: args,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
res.DoneReason = cr.DoneReason.String()
|
|
||||||
res.TotalDuration = time.Since(checkpointStart)
|
|
||||||
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
|
||||||
|
|
||||||
if !req.Raw {
|
if !req.Raw {
|
||||||
tokens, err := r.Tokenize(c.Request.Context(), prompt+sb.String())
|
tokens, err := r.Tokenize(c.Request.Context(), prompt+sb.String())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -1686,12 +1673,12 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
defer close(ch)
|
defer close(ch)
|
||||||
|
|
||||||
if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
|
if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
|
||||||
Prompt: prompt,
|
Prompt: prompt,
|
||||||
Images: images,
|
Images: images,
|
||||||
Format: req.Format,
|
Format: req.Format,
|
||||||
Options: opts,
|
Options: opts,
|
||||||
FunctionNameMap: functionNameMap,
|
UseHarmony: useHarmony,
|
||||||
PrefillContent: prefillContentOrThinking,
|
PrefillContent: prefillContentOrThinking,
|
||||||
}, func(r llm.CompletionResponse) {
|
}, func(r llm.CompletionResponse) {
|
||||||
res := api.ChatResponse{
|
res := api.ChatResponse{
|
||||||
Model: req.Model,
|
Model: req.Model,
|
||||||
@@ -1713,6 +1700,9 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
|
|
||||||
if useHarmony {
|
if useHarmony {
|
||||||
// only send messages with meaningful content (empty messages confuse clients)
|
// only send messages with meaningful content (empty messages confuse clients)
|
||||||
|
for i, tool := range res.Message.ToolCalls {
|
||||||
|
res.Message.ToolCalls[i].Function.Name = functionNameMap.OriginalFromConverted(tool.Function.Name)
|
||||||
|
}
|
||||||
if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || res.Done {
|
if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || res.Done {
|
||||||
ch <- res
|
ch <- res
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user