mirror of
https://github.com/ollama/ollama.git
synced 2025-06-29 22:00:50 +02:00
fix: add back bf16 support
this was accidentally removed when moving fs/ggml from its previous location
This commit is contained in:
@ -207,11 +207,26 @@ func (t Tensor) block() (n int) {
|
||||
|
||||
func (t Tensor) blockSize() uint64 {
|
||||
switch t.Kind {
|
||||
case 0, 1, 24, 25, 26, 27, 28, 30: // F32, F16, I8, I16, I32, I64, F64, BF16
|
||||
case
|
||||
0, // F32
|
||||
1, // F16
|
||||
24, // I8
|
||||
25, // I16
|
||||
26, // I32
|
||||
27, // I64
|
||||
28, // F64
|
||||
30: // BF16
|
||||
return 1
|
||||
case 2, 3, 4, 5, 6, 7, 8, 9, 20: // Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, Q8_1, IQ4_NL
|
||||
case
|
||||
2, // Q4_0
|
||||
3, // Q4_1
|
||||
6, // Q5_0
|
||||
7, // Q5_1
|
||||
8, // Q8_0
|
||||
9, // Q8_1
|
||||
20: // IQ4_NL
|
||||
return 32
|
||||
default: // All others
|
||||
default:
|
||||
return 256
|
||||
}
|
||||
}
|
||||
@ -235,7 +250,7 @@ func (t Tensor) typeSize() uint64 {
|
||||
case 8: // Q8_0
|
||||
return 2 + blockSize
|
||||
case 9: // Q8_1
|
||||
return 4 + 4 + blockSize
|
||||
return 2 + 2 + blockSize
|
||||
case 10: // Q2_K
|
||||
return blockSize/16 + blockSize/4 + 2 + 2
|
||||
case 11: // Q3_K
|
||||
@ -247,7 +262,7 @@ func (t Tensor) typeSize() uint64 {
|
||||
case 14: // Q6_K
|
||||
return blockSize/2 + blockSize/4 + blockSize/16 + 2
|
||||
case 15: // Q8_K
|
||||
return 2 + blockSize + 2*blockSize/16
|
||||
return 4 + blockSize + 2*blockSize/16
|
||||
case 16: // IQ2_XXS
|
||||
return 2 + 2*blockSize/8
|
||||
case 17: // IQ2_XS
|
||||
@ -276,6 +291,8 @@ func (t Tensor) typeSize() uint64 {
|
||||
return 8
|
||||
case 29: // IQ1_M
|
||||
return blockSize/8 + blockSize/16 + blockSize/32
|
||||
case 30: // BF16
|
||||
return 2
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ package ggml
|
||||
import (
|
||||
"maps"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
@ -157,3 +158,55 @@ func TestTensorLayers(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ref: https://github.com/ggml-org/llama.cpp/blob/a82c9e7c23ef6db48cebfa194dc9cebbc4ac3552/ggml/src/ggml.c#L572
|
||||
func TestTensorTypes(t *testing.T) {
|
||||
cases := []struct {
|
||||
kind uint32
|
||||
blockSize uint64
|
||||
typeSize uint64
|
||||
}{
|
||||
{0, 1, 4},
|
||||
{1, 1, 2},
|
||||
{2, 32, 18},
|
||||
{3, 32, 20},
|
||||
{6, 32, 22},
|
||||
{7, 32, 24},
|
||||
{8, 32, 34},
|
||||
{9, 32, 36},
|
||||
{10, 256, 84},
|
||||
{11, 256, 110},
|
||||
{12, 256, 144},
|
||||
{13, 256, 176},
|
||||
{14, 256, 210},
|
||||
{15, 256, 292},
|
||||
{16, 256, 66},
|
||||
{17, 256, 74},
|
||||
{18, 256, 98},
|
||||
{19, 256, 50},
|
||||
{20, 32, 18},
|
||||
{21, 256, 110},
|
||||
{22, 256, 82},
|
||||
{23, 256, 136},
|
||||
{24, 1, 1},
|
||||
{25, 1, 2},
|
||||
{26, 1, 4},
|
||||
{27, 1, 8},
|
||||
{28, 1, 8},
|
||||
{29, 256, 56},
|
||||
{30, 1, 2},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(strconv.Itoa(int(tt.kind)), func(t *testing.T) {
|
||||
tensor := Tensor{Kind: tt.kind}
|
||||
if tensor.blockSize() != tt.blockSize {
|
||||
t.Errorf("unexpected block size: got=%d want=%d", tensor.blockSize(), tt.blockSize)
|
||||
}
|
||||
|
||||
if tensor.typeSize() != tt.typeSize {
|
||||
t.Errorf("unexpected type size: got=%d want=%d", tensor.typeSize(), tt.typeSize)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user