From 8e8f2c6d67ff3ee758102f197bb1b347ed1e97bd Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@ollama.com>
Date: Thu, 1 May 2025 11:34:02 -0700
Subject: [PATCH] ollamarunner: Fix memory leak when processing images

The context (and therefore associated input tensors) was not being
properly closed when images were being processed. We were trying to
close them but in reality we were closing over an empty list, preventing
anything from actually being freed.

Fixes #10434
---
 runner/ollamarunner/runner.go | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)
diff --git a/runner/ollamarunner/runner.go b/runner/ollamarunner/runner.go
index 7ca6dc8c1..9597f7cdd 100644
--- a/runner/ollamarunner/runner.go
+++ b/runner/ollamarunner/runner.go
@@ -34,14 +34,10 @@ import (
 	_ "github.com/ollama/ollama/model/models"
 )
 
-type contextList struct {
-	list []ml.Context
-}
-
 type Sequence struct {
 	// ctxs are used for allocating tensors that last the lifetime of the sequence, such as
 	// multimodal embeddings
-	ctxs *contextList
+	ctxs []ml.Context
 
 	// batch index
 	iBatch int
@@ -177,8 +173,10 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
 // inputs processes the prompt and images into a list of inputs
 // by splitting the prompt on [img-<n>] tags, tokenizing text and
 // decoding images
-func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *contextList, error) {
+func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, []ml.Context, error) {
 	var inputs []input.Input
+	var ctxs []ml.Context
+
 	var parts []string
 	var matches [][]string
 
@@ -192,13 +190,6 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *
 		parts = []string{prompt}
 	}
 
-	var contexts contextList
-	runtime.AddCleanup(&contexts, func(ctxs []ml.Context) {
-		for _, ctx := range ctxs {
-			ctx.Close()
-		}
-	}, contexts.list)
-
 	postTokenize := false
 	for i, part := range parts {
 		// text - tokenize
@@ -228,7 +219,8 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *
 			}
 
 			ctx := s.model.Backend().NewContext()
-			contexts.list = append(contexts.list, ctx)
+			runtime.SetFinalizer(ctx, func(c ml.Context) { c.Close() })
+			ctxs = append(ctxs, ctx)
 			imageEmbeddings, err := multimodalProcessor.EncodeMultimodal(ctx, images[imageIndex].Data)
 			if err != nil {
 				return nil, nil, err
@@ -251,7 +243,7 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *
 		}
 	}
 
-	return inputs, &contexts, nil
+	return inputs, ctxs, nil
 }
 
 type Server struct {