瀏覽代碼

Remove VRAM convergence check for windows

The APIs we query are optimistic on free space, and windows pages
VRAM, so we don't have to wait to see reported usage recover on unload
Daniel Hiltgen 11 月之前
父節點
當前提交
ec231a7923
共有 1 個文件被更改,包括 3 次插入2 次删除
  1. 3 2
      server/sched.go

+ 3 - 2
server/sched.go

@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"fmt"
 	"log/slog"
 	"log/slog"
 	"reflect"
 	"reflect"
+	"runtime"
 	"sort"
 	"sort"
 	"strings"
 	"strings"
 	"sync"
 	"sync"
@@ -487,8 +488,8 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
 func (runner *runnerRef) waitForVRAMRecovery() chan interface{} {
 func (runner *runnerRef) waitForVRAMRecovery() chan interface{} {
 	finished := make(chan interface{}, 1)
 	finished := make(chan interface{}, 1)
 
 
-	// CPU or Metal don't need checking, so no waiting required
-	if len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal") {
+	// CPU or Metal don't need checking, so no waiting required, windows can page VRAM, and the APIs we query tend to be optimistic on free space
+	if (len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal")) || runtime.GOOS == "windows" {
 		finished <- struct{}{}
 		finished <- struct{}{}
 		return finished
 		return finished
 	}
 	}