@@ -55,6 +55,6 @@ func getCPUMem() (memInfo, error) {
return memInfo{
TotalMemory: uint64(C.getPhysicalMemory()),
FreeMemory: 0,
- DeviceCount: 0,
+ DeviceCount: 1,
}, nil
}
@@ -79,6 +79,9 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
graphFullOffload = graphPartialOffload
+ graphFullOffload *= uint64(info.DeviceCount)
+ graphPartialOffload *= uint64(info.DeviceCount)
+
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
memoryRequiredTotal := memoryMinimum + graphFullOffload