11 months ago · 4fcc84e67a
--- a/server/routes.go
+++ b/server/routes.go
@@ -1036,7 +1036,8 @@ func Serve(ln net.Listener) error {
 
				 	}
			
 
				 
			
 
				 	ctx, done := context.WithCancel(context.Background())
			
 
				-	sched := InitScheduler(ctx)
			
 
				+	schedCtx, schedDone := context.WithCancel(ctx)
			
 
				+	sched := InitScheduler(schedCtx)
			
 
				 	s := &Server{addr: ln.Addr(), sched: sched}
			
 
				 	r := s.GenerateRoutes()
			
 
				 
			
@@ -1051,24 +1052,31 @@ func Serve(ln net.Listener) error {
 
				 	go func() {
			
 
				 		<-signals
			
 
				 		srvr.Close()
			
 
				-		done()
			
 
				+		schedDone()
			
 
				 		sched.unloadAllRunners()
			
 
				 		gpu.Cleanup()
			
 
				-		os.Exit(0)
			
 
				+		done()
			
 
				 	}()
			
 
				 
			
 
				 	if err := llm.Init(); err != nil {
			
 
				 		return fmt.Errorf("unable to initialize llm library %w", err)
			
 
				 	}
			
 
				 
			
 
				-	s.sched.Run(ctx)
			
 
				+	s.sched.Run(schedCtx)
			
 
				 
			
 
				 	// At startup we retrieve GPU information so we can get log messages before loading a model
			
 
				 	// This will log warnings to the log in case we have problems with detected GPUs
			
 
				 	gpus := gpu.GetGPUInfo()
			
 
				 	gpus.LogDetails()
			
 
				 
			
 
				-	return srvr.Serve(ln)
			
 
				+	err = srvr.Serve(ln)
			
 
				+	// If server is closed from the signal handler, wait for the ctx to be done
			
 
				+	// otherwise error out quickly
			
 
				+	if !errors.Is(err, http.ErrServerClosed) {
			
 
				+		return err
			
 
				+	}
			
 
				+	<-ctx.Done()
			
 
				+	return err
			
 
				 }
			
 
				 
			
 
				 func waitForStream(c *gin.Context, ch chan interface{}) {