浏览代码

Merge pull request #4682 from dhiltgen/more_time

Give the final model loading more time
Daniel Hiltgen 11 月之前
父节点
当前提交
d7339fad52
共有 1 个文件被更改,包括 8 次插入2 次删除
  1. 8 2
      llm/server.go

+ 8 - 2
llm/server.go

@@ -519,11 +519,13 @@ func (s *llmServer) Ping(ctx context.Context) error {
 
 func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
 	start := time.Now()
-	stallDuration := 60 * time.Second
-	stallTimer := time.Now().Add(stallDuration) // give up if we stall for
+	stallDuration := 5 * time.Minute            // If no progress happens
+	finalLoadDuration := 5 * time.Minute        // After we hit 100%, give the runner more time to come online
+	stallTimer := time.Now().Add(stallDuration) // give up if we stall
 
 	slog.Info("waiting for llama runner to start responding")
 	var lastStatus ServerStatus = -1
+	fullyLoaded := false
 
 	for {
 		select {
@@ -572,6 +574,10 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
 			if priorProgress != s.loadProgress {
 				slog.Debug(fmt.Sprintf("model load progress %0.2f", s.loadProgress))
 				stallTimer = time.Now().Add(stallDuration)
+			} else if !fullyLoaded && int(s.loadProgress*100.0) >= 100 {
+				slog.Debug("model load completed, waiting for server to become available", "status", status.ToString())
+				stallTimer = time.Now().Add(finalLoadDuration)
+				fullyLoaded = true
 			}
 			time.Sleep(time.Millisecond * 250)
 			continue