Browse Source

scale graph based on gpu count

Michael Yang 1 năm trước cách đây
mục cha
commit
26df674785
2 tập tin đã thay đổi với 4 bổ sung1 xóa
  1. 1 1
      gpu/gpu_darwin.go
  2. 3 0
      llm/server.go

+ 1 - 1
gpu/gpu_darwin.go

@@ -55,6 +55,6 @@ func getCPUMem() (memInfo, error) {
 	return memInfo{
 	return memInfo{
 		TotalMemory: uint64(C.getPhysicalMemory()),
 		TotalMemory: uint64(C.getPhysicalMemory()),
 		FreeMemory:  0,
 		FreeMemory:  0,
-		DeviceCount: 0,
+		DeviceCount: 1,
 	}, nil
 	}, nil
 }
 }

+ 3 - 0
llm/server.go

@@ -79,6 +79,9 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 		graphFullOffload = graphPartialOffload
 		graphFullOffload = graphPartialOffload
 	}
 	}
 
 
+	graphFullOffload *= uint64(info.DeviceCount)
+	graphPartialOffload *= uint64(info.DeviceCount)
+
 	// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
 	// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
 	memoryRequiredTotal := memoryMinimum + graphFullOffload
 	memoryRequiredTotal := memoryMinimum + graphFullOffload