mirror of
https://github.com/ollama/ollama.git
synced 2025-09-26 22:57:09 +02:00
llm: introduce k/v context quantization (vRAM improvements) (#6279)
This commit is contained in:
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
func TestEstimateGPULayers(t *testing.T) {
|
||||
t.Setenv("OLLAMA_DEBUG", "1")
|
||||
t.Setenv("OLLAMA_KV_CACHE_TYPE", "") // Ensure default f16
|
||||
|
||||
modelName := "dummy"
|
||||
f, err := os.CreateTemp(t.TempDir(), modelName)
|
||||
|
Reference in New Issue
Block a user