Follow up to #10363 (#10647)

The quantization PR didn't block all unsupported file types, which this PR fixes. It also updates the API docs to reflect the now reduced set of supported types.
2025-11-11 01:37:30 +01:00 · 2025-05-12 15:23:31 -07:00
parent 0cefd46f23
commit 9d6df90805
4 changed files with 88 additions and 382 deletions
--- a/server/quantization_test.go
+++ b/server/quantization_test.go
@@ -42,71 +42,6 @@ func TestGetTensorNewType(t *testing.T) {
 			ftype:       fsggml.FileTypeF32,
 			expected:    fsggml.TensorTypeQ6_K,
 		},
-		{
-			name: "attn_v.weight_q4_k",
-			kv: map[string]any{
-				"general.architecture":        "foo",
-				"foo.attention.head_count":    uint32(4),
-				"foo.attention.head_count_kv": uint32(1),
-			},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_v.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
-		{
-			name:        "attn_v.weight_q3_k",
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_v.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K,
-			expected:    fsggml.TensorTypeQ3_K,
-		},
-		{
-			name: "attn_v.weight_q2_k_s_q4_k",
-			kv: map[string]any{
-				"general.architecture":        "foo",
-				"foo.attention.head_count":    uint32(4),
-				"foo.attention.head_count_kv": uint32(1),
-			},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_v.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K_S,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
-		{
-			name:        "attn_v.weight_q3_k_m",
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_v.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ5_K,
-		},
-		{
-			name: "attn_v.weight_q3_k_m_i",
-			qs: quantizeState{
-				iAttnV: 2,
-			},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_v.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
-		{
-			name:        "attn_v.weight_q3_k_l",
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_v.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_L,
-			expected:    fsggml.TensorTypeQ5_K,
-		},
 		{
 			name: "attn_v.weight_q4_k_m",
 			qs: quantizeState{
@@ -156,88 +91,6 @@ func TestGetTensorNewType(t *testing.T) {
 			ftype:       fsggml.FileTypeF32,
 			expected:    fsggml.TensorTypeQ8_0,
 		},
-		{
-			name:        "ffn_down_q2_k",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K,
-			expected:    fsggml.TensorTypeQ3_K,
-		},
-		{
-			name:        "ffn_down_q2_k_s",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K_S,
-			expected:    fsggml.TensorTypeQ4_0,
-		},
-		{
-			name: "ffn_down_q2_k_s_layers",
-			qs: quantizeState{
-				iFfnDown: 2,
-				nFfnDown: 3 * 8,
-			},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K_S,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
-		{
-			name: "ffn_down_q3_k_m_base",
-			qs: quantizeState{
-				iFfnDown: 1,
-				nFfnDown: 8,
-			},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ3_K,
-		},
-		{
-			name: "ffn_down_q3_k_m_16",
-			qs: quantizeState{
-				iFfnDown: 2,
-				nFfnDown: 3 * 16,
-			},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ5_K,
-		},
-		{
-			name: "ffn_down_q3_k_m_8",
-			qs: quantizeState{
-				iFfnDown: 2,
-				nFfnDown: 3 * 8,
-			},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
-		{
-			name:        "ffn_down_q3_k_l",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_L,
-			expected:    fsggml.TensorTypeQ5_K,
-		},
 		{
 			name: "ffn_down_q4_k_m",
 			qs: quantizeState{
@@ -264,19 +117,6 @@ func TestGetTensorNewType(t *testing.T) {
 			ftype:       fsggml.FileTypeQ4_K_M,
 			expected:    fsggml.TensorTypeQ6_K,
 		},
-		{
-			name: "ffn_down_q5_k_m",
-			qs: quantizeState{
-				iFfnDown: 2,
-				nFfnDown: 3 * 8,
-			},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ5_K_M,
-			expected:    fsggml.TensorTypeQ6_K,
-		},
 		{
 			name: "ffn_down_q4_k_s",
 			qs: quantizeState{
@@ -290,59 +130,6 @@ func TestGetTensorNewType(t *testing.T) {
 			ftype:       fsggml.FileTypeQ4_K_S,
 			expected:    fsggml.TensorTypeQ5_K,
 		},
-		{
-			name: "attn_output.weight_8_expert",
-			qs:   quantizeState{},
-			kv: map[string]any{
-				"general.architecture": "foo",
-				"foo.expert_count":     uint32(8),
-			},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_output.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K,
-			expected:    fsggml.TensorTypeQ5_K,
-		},
-		{
-			name:        "attn_output.weight_q2",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_output.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K,
-			expected:    fsggml.TensorTypeQ3_K,
-		},
-		{
-			name:        "attn_output.weight_q3_k_m",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_output.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
-		{
-			name:        "attn_output.weight_q3_k_l",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_output.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_L,
-			expected:    fsggml.TensorTypeQ5_K,
-		},
-		{
-			name:        "attn_qkv.weight_q3_k_m",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_qkv.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
 		{
 			name:        "attn_qkv.weight_q4_k_m",
 			qs:          quantizeState{},
@@ -353,16 +140,6 @@ func TestGetTensorNewType(t *testing.T) {
 			ftype:       fsggml.FileTypeQ4_K_M,
 			expected:    fsggml.TensorTypeQ5_K,
 		},
-		{
-			name:        "attn_qkv.weight_q5_k_m",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_qkv.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ5_K_M,
-			expected:    fsggml.TensorTypeQ6_K,
-		},
 	}
 	for _, tt := range cases {
 		t.Run(tt.name, func(t *testing.T) {