config: allow setting context length through env var (#8938)

* envconfig: allow setting context length through env var
2025-06-14 18:40:55 +02:00 · 2025-02-24 13:26:35 -08:00 · 2025-02-24 13:26:35 -08:00 · 314573bfe8
commit 314573bfe8
parent 4604b10306
4 changed files with 23 additions and 1 deletions
--- a/api/types.go
+++ b/api/types.go
@ -10,6 +10,8 @@ import (
 	"strconv"
 	"strings"
 	"time"
 	"github.com/ollama/ollama/envconfig"
 )
 // StatusError is an error with an HTTP status code and message.
@ -609,7 +611,7 @@ func DefaultOptions() Options {
 		Runner: Runner{
 			// options set when the model is loaded
-			NumCtx:    2048,
+			NumCtx:    int(envconfig.ContextLength()),
 			NumBatch:  512,
 			NumGPU:    -1, // -1 here indicates that NumGPU should be set dynamically
 			NumThread: 0,  // let the runtime decide
--- a/envconfig/config.go
+++ b/envconfig/config.go
@ -167,6 +167,8 @@ var (
 	MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
 	// Enable the new Ollama engine
 	NewEngine = Bool("OLLAMA_NEW_ENGINE")
 	// ContextLength sets the default context length
 	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 2048)
 )
 func String(s string) func() string {
@ -252,6 +254,7 @@ func AsMap() map[string]EnvVar {
 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
 		"OLLAMA_MULTIUSER_CACHE":   {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
 		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 2048)"},
 		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
 		// Informational
--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@ -272,3 +272,19 @@ func TestVar(t *testing.T) {
 		})
 	}
 }
 func TestContextLength(t *testing.T) {
 	cases := map[string]uint{
 		"":     2048,
 		"4096": 4096,
 	}
 	for k, v := range cases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_CONTEXT_LENGTH", k)
 			if i := ContextLength(); i != v {
 				t.Errorf("%s: expected %d, got %d", k, v, i)
 			}
 		})
 	}
 }
--- a/llm/memory_test.go
+++ b/llm/memory_test.go
@ -17,6 +17,7 @@ import (
 func TestEstimateGPULayers(t *testing.T) {
 	t.Setenv("OLLAMA_DEBUG", "1")
 	t.Setenv("OLLAMA_KV_CACHE_TYPE", "") // Ensure default f16
 	t.Setenv("OLLAMA_CONTEXT_LENGTH", "2048")
 	modelName := "dummy"
 	f, err := os.CreateTemp(t.TempDir(), modelName)