Browse Source

Merge pull request #4031 from MarkWard0110/fix/issue-3736

Fix/issue 3736: When runners are closing or expiring. Scheduler is getting dirty VRAM size readings.
Daniel Hiltgen 1 year ago
parent
commit
4fd064bea6
3 changed files with 18 additions and 8 deletions
  1. 2 1
      .gitignore
  2. 7 7
      llm/server.go
  3. 9 0
      server/sched.go

+ 2 - 1
.gitignore

@@ -11,4 +11,5 @@ ggml-metal.metal
 .idea
 .idea
 test_data
 test_data
 *.crt
 *.crt
-llm/build
+llm/build
+__debug_bin*

+ 7 - 7
llm/server.go

@@ -300,12 +300,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 			continue
 			continue
 		}
 		}
 
 
-		// reap subprocess when it exits
-		go func() {
-			// Exit status managed via getServerStatus
-			_ = s.cmd.Wait()
-		}()
-
 		// TODO - make sure this is all wired up correctly
 		// TODO - make sure this is all wired up correctly
 		// if err = s.WaitUntilRunning(); err != nil {
 		// if err = s.WaitUntilRunning(); err != nil {
 		// 	slog.Error("error starting llama server", "server", servers[i], "error", err)
 		// 	slog.Error("error starting llama server", "server", servers[i], "error", err)
@@ -899,7 +893,13 @@ func (s *llmServer) Detokenize(ctx context.Context, tokens []int) (string, error
 func (s *llmServer) Close() error {
 func (s *llmServer) Close() error {
 	if s.cmd != nil {
 	if s.cmd != nil {
 		slog.Debug("stopping llama server")
 		slog.Debug("stopping llama server")
-		return s.cmd.Process.Kill()
+		if err := s.cmd.Process.Kill(); err != nil {
+			return err
+		}
+
+		_ = s.cmd.Wait()
+
+		slog.Debug("llama server stopped")
 	}
 	}
 
 
 	return nil
 	return nil

+ 9 - 0
server/sched.go

@@ -250,6 +250,7 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
 						defer runner.refMu.Unlock()
 						defer runner.refMu.Unlock()
 						if runner.expireTimer != nil {
 						if runner.expireTimer != nil {
 							runner.expireTimer.Stop()
 							runner.expireTimer.Stop()
+							runner.expireTimer = nil
 						}
 						}
 						s.expiredCh <- runner
 						s.expiredCh <- runner
 					})
 					})
@@ -296,6 +297,10 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
 	runner.refMu.Lock()
 	runner.refMu.Lock()
 	defer runner.refMu.Unlock()
 	defer runner.refMu.Unlock()
 	runner.refCount++
 	runner.refCount++
+	if runner.expireTimer != nil {
+		runner.expireTimer.Stop()
+		runner.expireTimer = nil
+	}
 	runner.sessionDuration = pending.sessionDuration
 	runner.sessionDuration = pending.sessionDuration
 	pending.successCh <- runner
 	pending.successCh <- runner
 	go func() {
 	go func() {
@@ -426,6 +431,10 @@ type runnerRef struct {
 
 
 // The refMu must already be held when calling unload
 // The refMu must already be held when calling unload
 func (runner *runnerRef) unload() {
 func (runner *runnerRef) unload() {
+	if runner.expireTimer != nil {
+		runner.expireTimer.Stop()
+		runner.expireTimer = nil
+	}
 	if runner.llama != nil {
 	if runner.llama != nil {
 		runner.llama.Close()
 		runner.llama.Close()
 	}
 	}