mirror of
https://github.com/ollama/ollama.git
synced 2025-03-20 14:52:59 +01:00
feat: add new Ollama engine using ggml through cgo This change introduces a new way to run pretrained models. It introduces 3 high level interfaces and a bunch of smaller helper interfaces to facilitate this. - `model.Model` defines the interface for a model architecture. Models such as `llama` and `mllama`, which are provided as examples, can implement the model's forward propagation in the `Forward` method. This method will be called to generate completions. This interface can be found in `model/model.go` - `ml.Backend` defines the interface for a backend tensor library, in this case `ggml`. Among other things, a Backend is responsible for loading a pretrained model into hardware (GPU, CPU, etc) and providing an interface for Models to access loaded tensors. This interface can be found in `ml/backend.go` - `ml.Tensor` defines the interface for a tensor and tensor operations This is the first implementation of the new engine. Follow up PRs will implement more features: - non-greedy sampling (#8410) - integration with Ollama and KV caching (#8301) - more model support (#9080) with more coming soon Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
75 lines
1.2 KiB
Go
75 lines
1.2 KiB
Go
package sample
|
|
|
|
import (
|
|
"slices"
|
|
|
|
"gonum.org/v1/gonum/floats"
|
|
"gonum.org/v1/gonum/stat/sampleuv"
|
|
)
|
|
|
|
type Sampler interface {
|
|
Sample([]float64) ([]float64, error)
|
|
}
|
|
|
|
type Temperature float64
|
|
|
|
func (s Temperature) Sample(t []float64) ([]float64, error) {
|
|
floats.Div(t, slices.Repeat([]float64{float64(s)}, len(t)))
|
|
return t, nil
|
|
}
|
|
|
|
type softmax struct{}
|
|
|
|
func Softmax() Sampler {
|
|
return softmax{}
|
|
}
|
|
|
|
func (softmax) Sample(t []float64) ([]float64, error) {
|
|
return t, nil
|
|
}
|
|
|
|
type TopK int
|
|
|
|
func (s TopK) Sample(t []float64) ([]float64, error) {
|
|
return t, nil
|
|
}
|
|
|
|
type TopP float32
|
|
|
|
func (s TopP) Sample(t []float64) ([]float64, error) {
|
|
return t, nil
|
|
}
|
|
|
|
type MinP float32
|
|
|
|
func (s MinP) Sample(t []float64) ([]float64, error) {
|
|
return t, nil
|
|
}
|
|
|
|
type weighed struct{}
|
|
|
|
func Weighed() Sampler {
|
|
return weighed{}
|
|
}
|
|
|
|
func (s weighed) Sample(t []float64) ([]float64, error) {
|
|
w := sampleuv.NewWeighted(t, nil)
|
|
if v, ok := w.Take(); ok {
|
|
return []float64{float64(v)}, nil
|
|
}
|
|
|
|
return t, nil
|
|
}
|
|
|
|
func Sample(floats []float64, samplers ...Sampler) ([]float64, error) {
|
|
var err error
|
|
for _, sampler := range samplers {
|
|
floats, err = sampler.Sample(floats)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return floats, nil
|
|
}
|