Explorar o código

Merge pull request #6402 from rick-github/numParallel

Override numParallel in pickBestPartialFitByLibrary() only if unset.
Daniel Hiltgen hai 8 meses
pai
achega
88e7705079
Modificáronse 1 ficheiros con 4 adicións e 1 borrados
  1. 4 1
      server/sched.go

+ 4 - 1
server/sched.go

@@ -734,7 +734,10 @@ func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoL
 
 // If multiple Libraries are detected, pick the Library which loads the most layers for the model
 func pickBestPartialFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numParallel *int) gpu.GpuInfoList {
-	*numParallel = 1
+	if *numParallel <= 0 {
+		*numParallel = 1
+		req.opts.NumCtx = req.origNumCtx
+	}
 	byLibrary := gpus.ByLibrary()
 	if len(byLibrary) <= 1 {
 		return gpus