1 年之前 · cb534e6ac2
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -131,10 +131,11 @@ func getCPUMem() (memInfo, error) {
 
															 func CheckVRAM() (int64, error) {
														
 
															 	gpuInfo := GetGPUInfo()
														
 
															 	if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
														
 
															-		// allocate 384MiB for llama.cpp overhead (outside of model)
														
 
															-		overhead := uint64(384 * 1024 * 1024)
														
 
															-		if gpuInfo.FreeMemory <= overhead {
														
 
															-			return 0, nil
														
 
															+		// leave 10% or 400MiB of VRAM free for overhead
														
 
															+		overhead := gpuInfo.FreeMemory / 10
														
 
															+		minOverhead := 400 * 1024 * 1024
														
 
															+		if overhead < minOverhead {
														
 
															+			overhead = minOverhead
														
 
															 		}
														
 
															 		return int64(gpuInfo.FreeMemory - overhead), nil
														
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -117,6 +117,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 
															 			bytesPerLayer := int64((requiredModel + requiredKv) / int64(ggml.NumLayers()))
														
 
															 			log.Println("bytes per layer:", bytesPerLayer)
														
 
															 			layers := available / bytesPerLayer
														
 
															+			log.Println("total required with split:", requiredAlloc+(layers*bytesPerLayer))
														
 
															 			if layers < int64(opts.NumGPU) {
														
 
															 				opts.NumGPU = int(layers)
														
 
															 			}