|
@@ -567,9 +567,9 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.
|
|
|
// - try subsets of GPUs instead of just falling back to 1 or all in a family
|
|
|
|
|
|
// Now try all the GPUs
|
|
|
- if ok, estimatedVRAM = llm.PredictServerFit(gl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
|
|
- slog.Debug("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", gl[0].Library, "required", format.HumanBytes2(estimatedVRAM))
|
|
|
- return gl
|
|
|
+ if ok, estimatedVRAM = llm.PredictServerFit(sgl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok {
|
|
|
+ slog.Debug("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", sgl[0].Library, "required", format.HumanBytes2(estimatedVRAM))
|
|
|
+ return sgl
|
|
|
}
|
|
|
}
|
|
|
return nil
|