Merge pull request #167 from jmorganca/decode-ggml

partial decode ggml bin for more info
2025-11-11 20:37:31 +01:00 · 2023-08-10 17:22:40 -07:00
parent 21e6197c0b fccf8d179f
commit 6a6828bddf
26 changed files with 336 additions and 69 deletions
--- a/server/routes.go
+++ b/server/routes.go
@@ -21,14 +21,14 @@ import (
 	"gonum.org/v1/gonum/mat"

 	"github.com/jmorganca/ollama/api"
-	"github.com/jmorganca/ollama/llama"
+	"github.com/jmorganca/ollama/llm"
 	"github.com/jmorganca/ollama/vector"
 )

 var loaded struct {
 	mu sync.Mutex

-	llm        *llama.LLM
+	llm        llm.LLM
 	Embeddings []vector.Embedding

 	expireAt    time.Time
@@ -63,11 +63,16 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur
 			loaded.Embeddings = model.Embeddings
 		}

-		llm, err := llama.New(model.ModelPath, opts)
+		llmModel, err := llm.New(model.ModelPath, opts)
 		if err != nil {
 			return err
 		}

+		// set cache values before modifying opts
+		loaded.llm = llmModel
+		loaded.digest = model.Digest
+		loaded.options = opts
+
 		if opts.NumKeep < 0 {
 			promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
 			if err != nil {
@@ -79,15 +84,13 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur
 				return err
 			}

-			tokensWithSystem := llm.Encode(promptWithSystem)
-			tokensNoSystem := llm.Encode(promptNoSystem)
+			tokensWithSystem := llmModel.Encode(promptWithSystem)
+			tokensNoSystem := llmModel.Encode(promptNoSystem)

-			llm.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
+			opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
+
+			llmModel.SetOptions(opts)
 		}
-
-		loaded.llm = llm
-		loaded.digest = model.Digest
-		loaded.options = opts
 	}
 	loaded.expireAt = time.Now().Add(sessionDuration)