11 months ago · 92c81e8117
--- a/llm/server.go
+++ b/llm/server.go
@@ -519,11 +519,13 @@ func (s *llmServer) Ping(ctx context.Context) error {
 
				 
			
 
				 func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
			
 
				 	start := time.Now()
			
 
				-	stallDuration := 60 * time.Second
			
 
				-	stallTimer := time.Now().Add(stallDuration) // give up if we stall for
			
 
				+	stallDuration := 5 * time.Minute            // If no progress happens
			
 
				+	finalLoadDuration := 5 * time.Minute        // After we hit 100%, give the runner more time to come online
			
 
				+	stallTimer := time.Now().Add(stallDuration) // give up if we stall
			
 
				 
			
 
				 	slog.Info("waiting for llama runner to start responding")
			
 
				 	var lastStatus ServerStatus = -1
			
 
				+	fullyLoaded := false
			
 
				 
			
 
				 	for {
			
 
				 		select {
			
@@ -572,6 +574,10 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
 
				 			if priorProgress != s.loadProgress {
			
 
				 				slog.Debug(fmt.Sprintf("model load progress %0.2f", s.loadProgress))
			
 
				 				stallTimer = time.Now().Add(stallDuration)
			
 
				+			} else if !fullyLoaded && int(s.loadProgress*100.0) >= 100 {
			
 
				+				slog.Debug("model load completed, waiting for server to become available", "status", status.ToString())
			
 
				+				stallTimer = time.Now().Add(finalLoadDuration)
			
 
				+				fullyLoaded = true
			
 
				 			}
			
 
				 			time.Sleep(time.Millisecond * 250)
			
 
				 			continue