mirror of
https://github.com/ollama/ollama.git
synced 2025-11-11 20:37:31 +01:00
Merge pull request #167 from jmorganca/decode-ggml
partial decode ggml bin for more info
This commit is contained in:
@@ -21,14 +21,14 @@ import (
|
||||
"gonum.org/v1/gonum/mat"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/llama"
|
||||
"github.com/jmorganca/ollama/llm"
|
||||
"github.com/jmorganca/ollama/vector"
|
||||
)
|
||||
|
||||
var loaded struct {
|
||||
mu sync.Mutex
|
||||
|
||||
llm *llama.LLM
|
||||
llm llm.LLM
|
||||
Embeddings []vector.Embedding
|
||||
|
||||
expireAt time.Time
|
||||
@@ -63,11 +63,16 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur
|
||||
loaded.Embeddings = model.Embeddings
|
||||
}
|
||||
|
||||
llm, err := llama.New(model.ModelPath, opts)
|
||||
llmModel, err := llm.New(model.ModelPath, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// set cache values before modifying opts
|
||||
loaded.llm = llmModel
|
||||
loaded.digest = model.Digest
|
||||
loaded.options = opts
|
||||
|
||||
if opts.NumKeep < 0 {
|
||||
promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
|
||||
if err != nil {
|
||||
@@ -79,15 +84,13 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur
|
||||
return err
|
||||
}
|
||||
|
||||
tokensWithSystem := llm.Encode(promptWithSystem)
|
||||
tokensNoSystem := llm.Encode(promptNoSystem)
|
||||
tokensWithSystem := llmModel.Encode(promptWithSystem)
|
||||
tokensNoSystem := llmModel.Encode(promptNoSystem)
|
||||
|
||||
llm.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
|
||||
opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
|
||||
|
||||
llmModel.SetOptions(opts)
|
||||
}
|
||||
|
||||
loaded.llm = llm
|
||||
loaded.digest = model.Digest
|
||||
loaded.options = opts
|
||||
}
|
||||
loaded.expireAt = time.Now().Add(sessionDuration)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user