mirror of
https://github.com/ollama/ollama.git
synced 2025-08-26 22:41:47 +02:00
ggml: Use GGML's typedef'ed pointer types
For many backend data structures, GGML defines a typedef of a pointer type and returns these from functions. In most cases, CGo understands that these are interchangable but some parts of Go (such as generics) think they are two different types. We should prefer the form that GGML uses.
This commit is contained in:
@@ -33,9 +33,9 @@ import (
|
|||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
)
|
)
|
||||||
|
|
||||||
func devices() []*C.struct_ggml_backend_device {
|
func devices() []C.ggml_backend_dev_t {
|
||||||
ggml.OnceLoad()
|
ggml.OnceLoad()
|
||||||
ds := make([]*C.struct_ggml_backend_device, C.ggml_backend_dev_count())
|
ds := make([]C.ggml_backend_dev_t, C.ggml_backend_dev_count())
|
||||||
for i := range ds {
|
for i := range ds {
|
||||||
ds[i] = C.ggml_backend_dev_get(C.size_t(i))
|
ds[i] = C.ggml_backend_dev_get(C.size_t(i))
|
||||||
}
|
}
|
||||||
@@ -53,23 +53,23 @@ type Backend struct {
|
|||||||
// to the name that is used by the model definition
|
// to the name that is used by the model definition
|
||||||
tensorLoadTargets map[string][]string
|
tensorLoadTargets map[string][]string
|
||||||
|
|
||||||
sched *C.struct_ggml_backend_sched
|
sched C.ggml_backend_sched_t
|
||||||
schedBackends []*C.struct_ggml_backend
|
schedBackends []C.ggml_backend_t
|
||||||
schedBufts []*C.struct_ggml_backend_buffer_type
|
schedBufts []C.ggml_backend_buffer_type_t
|
||||||
|
|
||||||
tensors map[string]*C.struct_ggml_tensor
|
tensors map[string]*C.struct_ggml_tensor
|
||||||
|
|
||||||
// input is the backend used for inputs
|
// input is the backend used for inputs
|
||||||
input *C.struct_ggml_backend_buffer_type
|
input C.ggml_backend_buffer_type_t
|
||||||
|
|
||||||
// layers is the backend used for repeating layers
|
// layers is the backend used for repeating layers
|
||||||
layers map[int]*C.struct_ggml_backend_buffer_type
|
layers map[int]C.ggml_backend_buffer_type_t
|
||||||
|
|
||||||
// requiredMemory is the cumulative memory allocations needed by the backend
|
// requiredMemory is the cumulative memory allocations needed by the backend
|
||||||
requiredMemory *ml.BackendMemory
|
requiredMemory *ml.BackendMemory
|
||||||
|
|
||||||
// btDeviceMemory maps from a buffer type to the memory allocations associated with that device
|
// btDeviceMemory maps from a buffer type to the memory allocations associated with that device
|
||||||
btDeviceMemory map[*C.struct_ggml_backend_buffer_type]*ml.DeviceMemory
|
btDeviceMemory map[C.ggml_backend_buffer_type_t]*ml.DeviceMemory
|
||||||
|
|
||||||
flashAttention bool
|
flashAttention bool
|
||||||
|
|
||||||
@@ -100,14 +100,14 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
|
|||||||
)
|
)
|
||||||
|
|
||||||
var requiredMemory ml.BackendMemory
|
var requiredMemory ml.BackendMemory
|
||||||
btDeviceMemory := make(map[*C.struct_ggml_backend_buffer_type]*ml.DeviceMemory)
|
btDeviceMemory := make(map[C.ggml_backend_buffer_type_t]*ml.DeviceMemory)
|
||||||
|
|
||||||
type deviceBufferType struct {
|
type deviceBufferType struct {
|
||||||
d *C.struct_ggml_backend_device
|
d C.ggml_backend_dev_t
|
||||||
bts []*C.struct_ggml_backend_buffer_type
|
bts []C.ggml_backend_buffer_type_t
|
||||||
}
|
}
|
||||||
|
|
||||||
var cpus, accels, gpus []*C.struct_ggml_backend_device
|
var cpus, accels, gpus []C.ggml_backend_dev_t
|
||||||
for _, d := range devices() {
|
for _, d := range devices() {
|
||||||
switch C.ggml_backend_dev_type(d) {
|
switch C.ggml_backend_dev_type(d) {
|
||||||
case C.GGML_BACKEND_DEVICE_TYPE_CPU:
|
case C.GGML_BACKEND_DEVICE_TYPE_CPU:
|
||||||
@@ -149,7 +149,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
|
|||||||
bt := C.ggml_backend_dev_buffer_type(d)
|
bt := C.ggml_backend_dev_buffer_type(d)
|
||||||
gpuDeviceBufferTypes = append(gpuDeviceBufferTypes, deviceBufferType{
|
gpuDeviceBufferTypes = append(gpuDeviceBufferTypes, deviceBufferType{
|
||||||
d: d,
|
d: d,
|
||||||
bts: append([]*C.struct_ggml_backend_buffer_type{bt}, cpuDeviceBufferType.bts...),
|
bts: append([]C.ggml_backend_buffer_type_t{bt}, cpuDeviceBufferType.bts...),
|
||||||
})
|
})
|
||||||
btDeviceMemory[bt] = &requiredMemory.GPUs[i]
|
btDeviceMemory[bt] = &requiredMemory.GPUs[i]
|
||||||
requiredMemory.GPUs[i].Name = C.GoString(C.ggml_backend_dev_name(d))
|
requiredMemory.GPUs[i].Name = C.GoString(C.ggml_backend_dev_name(d))
|
||||||
@@ -235,8 +235,8 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
|
|||||||
targets := make(map[string][]string)
|
targets := make(map[string][]string)
|
||||||
|
|
||||||
// contexts are shared by tensors of the same buffer type
|
// contexts are shared by tensors of the same buffer type
|
||||||
ctxs := make(map[*C.struct_ggml_backend_buffer_type]*C.struct_ggml_context)
|
ctxs := make(map[C.ggml_backend_buffer_type_t]*C.struct_ggml_context)
|
||||||
createTensor := func(t tensor, bts []*C.struct_ggml_backend_buffer_type, layer int) *C.struct_ggml_tensor {
|
createTensor := func(t tensor, bts []C.ggml_backend_buffer_type_t, layer int) *C.struct_ggml_tensor {
|
||||||
for _, bt := range bts {
|
for _, bt := range bts {
|
||||||
if _, ok := ctxs[bt]; !ok {
|
if _, ok := ctxs[bt]; !ok {
|
||||||
ctxs[bt] = C.ggml_init(C.struct_ggml_init_params{
|
ctxs[bt] = C.ggml_init(C.struct_ggml_init_params{
|
||||||
@@ -330,7 +330,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// allocate buffers for each context
|
// allocate buffers for each context
|
||||||
bbs := make(map[*C.struct_ggml_context]*C.struct_ggml_backend_buffer, len(ctxs))
|
bbs := make(map[*C.struct_ggml_context]C.ggml_backend_buffer_t, len(ctxs))
|
||||||
for bt, c := range ctxs {
|
for bt, c := range ctxs {
|
||||||
if C.ggml_get_first_tensor(c) == nil {
|
if C.ggml_get_first_tensor(c) == nil {
|
||||||
continue
|
continue
|
||||||
@@ -388,11 +388,11 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// map devices to backend buffer types so new tensors can be assigned to the correct device
|
// map devices to backend buffer types so new tensors can be assigned to the correct device
|
||||||
deviceBufferTypes := make(map[*C.struct_ggml_backend_device]*C.struct_ggml_backend_buffer_type)
|
deviceBufferTypes := make(map[C.ggml_backend_dev_t]C.ggml_backend_buffer_type_t)
|
||||||
|
|
||||||
// create backends and buffer types used for the compute graph scheduler
|
// create backends and buffer types used for the compute graph scheduler
|
||||||
var schedBackends []*C.struct_ggml_backend
|
var schedBackends []C.ggml_backend_t
|
||||||
var schedBufts []*C.struct_ggml_backend_buffer_type
|
var schedBufts []C.ggml_backend_buffer_type_t
|
||||||
for _, d := range append(gpus, append(accels, cpus...)...) {
|
for _, d := range append(gpus, append(accels, cpus...)...) {
|
||||||
b := C.ggml_backend_dev_init(d, nil)
|
b := C.ggml_backend_dev_init(d, nil)
|
||||||
bt := C.ggml_backend_get_default_buffer_type(b)
|
bt := C.ggml_backend_get_default_buffer_type(b)
|
||||||
@@ -426,8 +426,8 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
|
|||||||
schedBackends: schedBackends,
|
schedBackends: schedBackends,
|
||||||
schedBufts: schedBufts,
|
schedBufts: schedBufts,
|
||||||
input: deviceBufferTypes[input.d],
|
input: deviceBufferTypes[input.d],
|
||||||
layers: func() map[int]*C.struct_ggml_backend_buffer_type {
|
layers: func() map[int]C.ggml_backend_buffer_type_t {
|
||||||
m := make(map[int]*C.struct_ggml_backend_buffer_type)
|
m := make(map[int]C.ggml_backend_buffer_type_t)
|
||||||
for i, layer := range layers {
|
for i, layer := range layers {
|
||||||
m[i] = deviceBufferTypes[layer.d]
|
m[i] = deviceBufferTypes[layer.d]
|
||||||
}
|
}
|
||||||
@@ -539,7 +539,7 @@ func (b *Backend) NewContextSize(n int) ml.Context {
|
|||||||
panic(fmt.Errorf("requested number of graph nodes (%v) for new context exceeds maximum (%v)", n, b.maxGraphNodes))
|
panic(fmt.Errorf("requested number of graph nodes (%v) for new context exceeds maximum (%v)", n, b.maxGraphNodes))
|
||||||
}
|
}
|
||||||
|
|
||||||
var allocatedBuffers []*C.struct_ggml_backend_buffer
|
var allocatedBuffers []C.ggml_backend_buffer_t
|
||||||
|
|
||||||
return &Context{
|
return &Context{
|
||||||
b: b,
|
b: b,
|
||||||
@@ -568,11 +568,11 @@ type Context struct {
|
|||||||
graph *C.struct_ggml_cgraph
|
graph *C.struct_ggml_cgraph
|
||||||
|
|
||||||
// buft is the buffer type used for new tensors
|
// buft is the buffer type used for new tensors
|
||||||
buft *C.struct_ggml_backend_buffer_type
|
buft C.ggml_backend_buffer_type_t
|
||||||
|
|
||||||
// allocatedBuffers are buffers for tensors that we have allocated in this context
|
// allocatedBuffers are buffers for tensors that we have allocated in this context
|
||||||
// so that we can free them when we close the context
|
// so that we can free them when we close the context
|
||||||
allocatedBuffers *[]*C.struct_ggml_backend_buffer
|
allocatedBuffers *[]C.ggml_backend_buffer_t
|
||||||
|
|
||||||
// maxGraphNodes is the maximum allowed number of graph nodes in this context
|
// maxGraphNodes is the maximum allowed number of graph nodes in this context
|
||||||
maxGraphNodes int
|
maxGraphNodes int
|
||||||
|
Reference in New Issue
Block a user