runner: use new grammar interface

This commit is contained in:
ParthSareen 2025-04-02 14:36:27 -07:00
parent 486a694852
commit 2d64c195a2

View File

@ -298,12 +298,6 @@ type Server struct {
// multimodalHash generates hashes for comparing equality
// of non-text data
multimodalHash maphash.Hash
// vocab is a llama.cpp vocab required for gammar-based
// constrained generation (json mode, structured outputs)
// TODO: this is temporary until Ollama sampling supports
// constrained generation
vocab *sample.Vocab
}
func (s *Server) allNil() bool {
@ -609,7 +603,7 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
var grammar *sample.Grammar
var err error
if req.Grammar != "" {
grammar, err = sample.NewGrammar(s.vocab, req.Grammar)
grammar, err = sample.NewGrammar(s.model.(model.TextProcessor).Vocabulary(), req.Grammar)
if err != nil {
http.Error(w, "failed to load model vocabulary required for format", http.StatusInternalServerError)
return
@ -749,8 +743,6 @@ func (s *Server) loadModel(
panic(err)
}
s.vocab = sample.NewVocab(mpath)
// TODO(jessegross): LoRA loading
if lpath.String() != "" {
panic("loras are not yet implemented")