mirror of
https://github.com/ollama/ollama.git
synced 2025-08-03 20:22:57 +02:00
sched: Lift parallel restriction for multimodal models except mllama
The Go runner does not have a problem with supporting parallel requests for most multimodal models. Now that we won't be potentially falling back to server.cpp, this restriction can be lifted. However, the new mllama model can't support parallel requests, so we will need to keep a restriction for that.
This commit is contained in:
@@ -130,11 +130,11 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||
continue
|
||||
}
|
||||
numParallel := int(envconfig.NumParallel())
|
||||
// TODO (jmorganca): multimodal models don't support parallel yet
|
||||
// TODO (jmorganca): mllama doesn't support parallel yet
|
||||
// see https://github.com/ollama/ollama/issues/4165
|
||||
if len(pending.model.ProjectorPaths) > 0 && numParallel != 1 {
|
||||
if checkMllamaModelFamily(pending.model) && numParallel != 1 {
|
||||
numParallel = 1
|
||||
slog.Warn("multimodal models don't support parallel requests yet")
|
||||
slog.Warn("mllama doesn't support parallel requests yet")
|
||||
}
|
||||
|
||||
for {
|
||||
|
Reference in New Issue
Block a user