Sfoglia il codice sorgente

Merge pull request #6402 from rick-github/numParallel

Override numParallel in pickBestPartialFitByLibrary() only if unset.
Daniel Hiltgen 8 mesi fa
parent
commit
88e7705079
1 ha cambiato i file con 4 aggiunte e 1 eliminazioni
  1. 4 1
      server/sched.go

+ 4 - 1
server/sched.go

@@ -734,7 +734,10 @@ func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoL
 
 // If multiple Libraries are detected, pick the Library which loads the most layers for the model
 func pickBestPartialFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numParallel *int) gpu.GpuInfoList {
-	*numParallel = 1
+	if *numParallel <= 0 {
+		*numParallel = 1
+		req.opts.NumCtx = req.origNumCtx
+	}
 	byLibrary := gpus.ByLibrary()
 	if len(byLibrary) <= 1 {
 		return gpus