1 年之前 · f0c454ab57
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@@ -10,6 +10,12 @@ package gpu
 
				 import "C"
			
 
				 import (
			
 
				 	"runtime"
			
 
				+
			
 
				+	"github.com/ollama/ollama/format"
			
 
				+)
			
 
				+
			
 
				+const (
			
 
				+	metalMinimumMemory = 512 * format.MebiByte
			
 
				 )
			
 
				 
			
 
				 func GetGPUInfo() GpuInfoList {
			
@@ -32,7 +38,7 @@ func GetGPUInfo() GpuInfoList {
 
				 	// TODO is there a way to gather actual allocated video memory? (currentAllocatedSize doesn't work)
			
 
				 	info.FreeMemory = info.TotalMemory
			
 
				 
			
 
				-	info.MinimumMemory = 0
			
 
				+	info.MinimumMemory = metalMinimumMemory
			
 
				 	return []GpuInfo{info}
			
 
				 }
			
 
				 
			
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -88,6 +88,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
 
				 	graphFullOffload *= uint64(len(gpus))
			
 
				 	graphPartialOffload *= uint64(len(gpus))
			
 
				 
			
 
				+	// on metal there's no partial offload overhead
			
 
				+	if gpus[0].Library == "metal" {
			
 
				+		graphPartialOffload = graphFullOffload
			
 
				+	}
			
 
				+
			
 
				 	// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
			
 
				 	memoryRequiredTotal := memoryMinimum + graphFullOffload