@@ -734,7 +734,9 @@ func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoL
// If multiple Libraries are detected, pick the Library which loads the most layers for the model
func pickBestPartialFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numParallel *int) gpu.GpuInfoList {
- *numParallel = 1
+ if *numParallel <= 0 {
+ *numParallel = 1
+ }
byLibrary := gpus.ByLibrary()
if len(byLibrary) <= 1 {
return gpus