mirror of
https://github.com/ollama/ollama.git
synced 2025-11-11 01:37:30 +01:00
The quantization PR didn't block all unsupported file types, which this PR fixes. It also updates the API docs to reflect the now reduced set of supported types.
This commit is contained in:
@@ -42,71 +42,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeF32,
|
||||
expected: fsggml.TensorTypeQ6_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q4_k",
|
||||
kv: map[string]any{
|
||||
"general.architecture": "foo",
|
||||
"foo.attention.head_count": uint32(4),
|
||||
"foo.attention.head_count_kv": uint32(1),
|
||||
},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q3_k",
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ3_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q2_k_s_q4_k",
|
||||
kv: map[string]any{
|
||||
"general.architecture": "foo",
|
||||
"foo.attention.head_count": uint32(4),
|
||||
"foo.attention.head_count_kv": uint32(1),
|
||||
},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K_S,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q3_k_m",
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q3_k_m_i",
|
||||
qs: quantizeState{
|
||||
iAttnV: 2,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q3_k_l",
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_L,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q4_k_m",
|
||||
qs: quantizeState{
|
||||
@@ -156,88 +91,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeF32,
|
||||
expected: fsggml.TensorTypeQ8_0,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q2_k",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ3_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q2_k_s",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K_S,
|
||||
expected: fsggml.TensorTypeQ4_0,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q2_k_s_layers",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 2,
|
||||
nFfnDown: 3 * 8,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K_S,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q3_k_m_base",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 1,
|
||||
nFfnDown: 8,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ3_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q3_k_m_16",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 2,
|
||||
nFfnDown: 3 * 16,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q3_k_m_8",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 2,
|
||||
nFfnDown: 3 * 8,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q3_k_l",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_L,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q4_k_m",
|
||||
qs: quantizeState{
|
||||
@@ -264,19 +117,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeQ4_K_M,
|
||||
expected: fsggml.TensorTypeQ6_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q5_k_m",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 2,
|
||||
nFfnDown: 3 * 8,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ5_K_M,
|
||||
expected: fsggml.TensorTypeQ6_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q4_k_s",
|
||||
qs: quantizeState{
|
||||
@@ -290,59 +130,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeQ4_K_S,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_output.weight_8_expert",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{
|
||||
"general.architecture": "foo",
|
||||
"foo.expert_count": uint32(8),
|
||||
},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_output.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_output.weight_q2",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_output.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ3_K,
|
||||
},
|
||||
{
|
||||
name: "attn_output.weight_q3_k_m",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_output.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_output.weight_q3_k_l",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_output.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_L,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_qkv.weight_q3_k_m",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_qkv.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_qkv.weight_q4_k_m",
|
||||
qs: quantizeState{},
|
||||
@@ -353,16 +140,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeQ4_K_M,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_qkv.weight_q5_k_m",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_qkv.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ5_K_M,
|
||||
expected: fsggml.TensorTypeQ6_K,
|
||||
},
|
||||
}
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user