better estimate scratch buffer size

2025-09-27 05:07:51 +02:00 · 2024-01-08 21:32:44 -05:00
parent 18ddf6d57d
commit 58ce2d8273
1 changed files with 2 additions and 2 deletions
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -62,8 +62,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)

 	// this amount is the overhead + tensors in memory
 	// TODO: get this from the llama.cpp's graph calcluations instead of
-	// guessing it's ~1/7th of the kv cache times gqa
-	requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 7
+	// estimating it's 1/6 * kv_cache_size * num_gqa
+	requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 6

 	requiredTotal := requiredModel + requiredKv + requiredAlloc