Ver código fonte

Skip scheduling cancelled requests, always reload unloaded runners (#4189)

Jeffrey Morgan 1 ano atrás
pai
commit
c9f98622b1
1 arquivos alterados com 10 adições e 0 exclusões
  1. 10 0
      server/sched.go

+ 10 - 0
server/sched.go

@@ -100,6 +100,12 @@ func (s *Scheduler) processPending(ctx context.Context) {
 			return
 			return
 		case pending := <-s.pendingReqCh:
 		case pending := <-s.pendingReqCh:
 			// Block other requests until we get this pending request running
 			// Block other requests until we get this pending request running
+
+			if pending.ctx.Err() != nil {
+				slog.Debug("pending request cancelled or timed out, skipping scheduling")
+				continue
+			}
+
 			for {
 			for {
 				var runnerToExpire *runnerRef
 				var runnerToExpire *runnerRef
 				s.loadedMu.Lock()
 				s.loadedMu.Lock()
@@ -435,6 +441,10 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
 		timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
 		timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
 	}
 	}
 
 
+	if runner.Options == nil {
+		return true
+	}
+
 	// Don't reload runner if num_gpu=-1 was provided
 	// Don't reload runner if num_gpu=-1 was provided
 	optsExisting := runner.Options.Runner
 	optsExisting := runner.Options.Runner
 	optsNew := req.opts.Runner
 	optsNew := req.opts.Runner