mirror of
https://github.com/ollama/ollama.git
synced 2025-09-14 01:51:54 +02:00
sched: fix runner leak during reloading unload (#10819)
When the same model is being reloaded rapidly with client connections being canceled before the model finishes loading, the queued unload event could cause a leak of runners by deleting a different runner from the loaded list.
This commit is contained in:
@@ -387,6 +387,17 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
|
||||
s.loadedMu.Unlock()
|
||||
runner.refMu.Unlock()
|
||||
slog.Debug("duplicate expired event, ignoring", "runner", runner)
|
||||
} else if runner.pid != runnerToUnload.pid {
|
||||
// If the pids do not match, we likely had multiple load
|
||||
// failures for the same model in quick succession due to
|
||||
// request context canceled and are draining the queue of
|
||||
// events. Ensure the orphaned runner is properly shut down, but
|
||||
// do not delete the mismatched loaded runner, or wait for VRAM
|
||||
// convergence.
|
||||
slog.Debug("orphaned runner shutting down", "orphan", runner, "loaded", runnerToUnload)
|
||||
runner.unload()
|
||||
s.loadedMu.Unlock()
|
||||
runner.refMu.Unlock()
|
||||
} else {
|
||||
slog.Debug("starting background wait for VRAM recovery", "runner", runner)
|
||||
finished := runner.waitForVRAMRecovery()
|
||||
|
Reference in New Issue
Block a user