浏览代码

Merge pull request #4329 from dhiltgen/zero_layers

Fall back to CPU runner with zero layers
Daniel Hiltgen 11 月之前
父节点
当前提交
879e2caf8c
共有 1 个文件被更改,包括 4 次插入0 次删除
  1. 4 0
      llm/server.go

+ 4 - 0
llm/server.go

@@ -105,6 +105,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 			// disable partial offloading when model is greater than total system memory as this
 			// disable partial offloading when model is greater than total system memory as this
 			// can lead to locking up the system
 			// can lead to locking up the system
 			opts.NumGPU = 0
 			opts.NumGPU = 0
+		} else if gpus[0].Library != "metal" && layers == 0 {
+			// Don't bother loading into the GPU if no layers can fit
+			cpuRunner = serverForCpu()
+			gpuCount = 0
 		} else if opts.NumGPU < 0 && layers > 0 && gpus[0].Library != "cpu" {
 		} else if opts.NumGPU < 0 && layers > 0 && gpus[0].Library != "cpu" {
 			opts.NumGPU = layers
 			opts.NumGPU = layers
 		}
 		}