Merge pull request #167 from jmorganca/decode-ggml

partial decode ggml bin for more info
This commit is contained in:
Michael Yang
2023-08-10 17:22:40 -07:00
committed by GitHub
26 changed files with 336 additions and 69 deletions

View File

@@ -21,14 +21,14 @@ import (
"gonum.org/v1/gonum/mat"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llama"
"github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/vector"
)
var loaded struct {
mu sync.Mutex
llm *llama.LLM
llm llm.LLM
Embeddings []vector.Embedding
expireAt time.Time
@@ -63,11 +63,16 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur
loaded.Embeddings = model.Embeddings
}
llm, err := llama.New(model.ModelPath, opts)
llmModel, err := llm.New(model.ModelPath, opts)
if err != nil {
return err
}
// set cache values before modifying opts
loaded.llm = llmModel
loaded.digest = model.Digest
loaded.options = opts
if opts.NumKeep < 0 {
promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
if err != nil {
@@ -79,15 +84,13 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur
return err
}
tokensWithSystem := llm.Encode(promptWithSystem)
tokensNoSystem := llm.Encode(promptNoSystem)
tokensWithSystem := llmModel.Encode(promptWithSystem)
tokensNoSystem := llmModel.Encode(promptNoSystem)
llm.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
llmModel.SetOptions(opts)
}
loaded.llm = llm
loaded.digest = model.Digest
loaded.options = opts
}
loaded.expireAt = time.Now().Add(sessionDuration)