mirror of
https://github.com/ollama/ollama.git
synced 2025-08-24 09:41:07 +02:00
runner: default to greedy sampler for performance (#9407)
As are adding support for weighted sampling we have seen some performance regressions, bypassing the sampler logic for now and defaulting to greedy until we can benchmark the new sampler logic.
This commit is contained in:
@@ -575,23 +575,11 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
sampler, err := sample.NewSampler(
|
||||
req.Temperature,
|
||||
req.TopK,
|
||||
req.TopP,
|
||||
req.MinP,
|
||||
req.Seed,
|
||||
)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("Failed to create sampler: %v", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
seq, err := s.NewSequence(req.Prompt, req.Images, NewSequenceParams{
|
||||
numPredict: req.NumPredict,
|
||||
stop: req.Stop,
|
||||
numKeep: int32(req.NumKeep),
|
||||
sampler: sampler,
|
||||
sampler: sample.Greedy(), // TODO: add support for different samplers when performance is optimized
|
||||
embedding: false,
|
||||
})
|
||||
if err != nil {
|
||||
|
Reference in New Issue
Block a user