Move quantization to new backend (#10363)

* Move quantization logic to GGML via new backend This moves the model aware logic to Go code and calls GGMLs quantization code for model creation. * Remove "add model quantizations" This is no longer needed now that quantization is implemented in Go+GGML code directly.
2025-11-11 06:39:00 +01:00 · 2025-05-06 11:20:48 -07:00
parent 95e744beeb
commit 424810450f
39 changed files with 1854 additions and 440 deletions
--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@@ -24,7 +24,7 @@ import (

 var stream bool = false

-func createBinFile(t *testing.T, kv map[string]any, ti []ggml.Tensor) (string, string) {
+func createBinFile(t *testing.T, kv map[string]any, ti []*ggml.Tensor) (string, string) {
 	t.Helper()
 	t.Setenv("OLLAMA_MODELS", cmp.Or(os.Getenv("OLLAMA_MODELS"), t.TempDir()))