From fbe6ae285a23baddb14c5bbce26d4fcb837503e4 Mon Sep 17 00:00:00 2001
From: Bruce MacDonald <brucewmacdonald@gmail.com>
Date: Thu, 22 May 2025 10:48:08 -0700
Subject: [PATCH] server: improve tensor quantization fallback logic (#10806)

Fall back to alternative quantization types when a tensor's dimensions aren't divisible by the block size required for the original desired quantization type. If retried quantization types fail, the system ultimately falls back to F16 (half-precision floating point) which has a block size of 1 and can handle any tensor dimension.
---
 server/quantization.go | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/server/quantization.go b/server/quantization.go
index adfc948ecf..e57e8a4dad 100644
--- a/server/quantization.go
+++ b/server/quantization.go
@@ -120,14 +120,30 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
 
 	if newType.IsQuantized() {
 		nx := shape[0]
-		ny := uint64(1)
-		if len(shape) > 1 {
-			ny = shape[1]
-		}
 		qk_k := newType.BlockSize()
+
+		// Check if first dimension is divisible by block size
 		if nx%qk_k != 0 {
-			slog.Warn(fmt.Sprintf("tensor cols %d x %d are not divisible by %d, required for %s.  Falling back to quantization %s", nx, ny, qk_k, newType.String(), fsggml.TensorTypeF16.String()))
-			newType = fsggml.TensorTypeF16
+			// Store the original type for logging
+			originalType := newType
+
+			// Select appropriate fallback based on original type
+			switch newType {
+			case fsggml.TensorTypeQ4_K:
+				newType = fsggml.TensorTypeQ5_0
+			case fsggml.TensorTypeQ5_K:
+				newType = fsggml.TensorTypeQ5_1
+			case fsggml.TensorTypeQ6_K:
+				newType = fsggml.TensorTypeQ8_0
+			}
+
+			// Final check - if still incompatible, fall back to F16
+			if nx%newType.BlockSize() != 0 {
+				newType = fsggml.TensorTypeF16
+			}
+
+			slog.Warn(fmt.Sprintf("tensor cols %d are not divisible by %d, required for %s - using fallback quantization %s",
+				nx, qk_k, originalType.String(), newType.String()))
 		}
 	}
 	return newType