update memory calcualtions

count each layer independently when deciding gpu offloading
2025-11-11 11:37:23 +01:00 · 2024-03-18 10:45:22 +01:00
parent d338d70492
commit 91b3e4d282
7 changed files with 125 additions and 89 deletions
--- a/server/routes.go
+++ b/server/routes.go
@@ -68,7 +68,7 @@ var loaded struct {
 var defaultSessionDuration = 5 * time.Minute

 // load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
-func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.Duration) error {
+func load(c *gin.Context, model *Model, opts *api.Options, sessionDuration time.Duration) error {
 	needLoad := loaded.runner == nil || // is there a model loaded?
 		loaded.ModelPath != model.ModelPath || // has the base model changed?
 		!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
@@ -97,7 +97,7 @@ func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.D

 		loaded.Model = model
 		loaded.runner = llmRunner
-		loaded.Options = &opts
+		loaded.Options = opts
 	}

 	loaded.expireAt = time.Now().Add(sessionDuration)
@@ -214,7 +214,7 @@ func GenerateHandler(c *gin.Context) {
 		sessionDuration = req.KeepAlive.Duration
 	}

-	if err := load(c, model, opts, sessionDuration); err != nil {
+	if err := load(c, model, &opts, sessionDuration); err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
@@ -460,7 +460,7 @@ func EmbeddingsHandler(c *gin.Context) {
 		sessionDuration = req.KeepAlive.Duration
 	}

-	if err := load(c, model, opts, sessionDuration); err != nil {
+	if err := load(c, model, &opts, sessionDuration); err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}
@@ -1267,7 +1267,7 @@ func ChatHandler(c *gin.Context) {
 		sessionDuration = req.KeepAlive.Duration
 	}

-	if err := load(c, model, opts, sessionDuration); err != nil {
+	if err := load(c, model, &opts, sessionDuration); err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
 	}