Under stress scenarios we're seeing OOMs so this should help stabilize the allocations under heavy concurrency stress.
@@ -31,8 +31,8 @@ type handles struct {
}
const (
- cudaMinimumMemory = 256 * format.MebiByte
- rocmMinimumMemory = 256 * format.MebiByte
+ cudaMinimumMemory = 457 * format.MebiByte
+ rocmMinimumMemory = 457 * format.MebiByte
)
var gpuMutex sync.Mutex
@@ -15,7 +15,7 @@ import (
- metalMinimumMemory = 384 * format.MebiByte
+ metalMinimumMemory = 512 * format.MebiByte
func GetGPUInfo() GpuInfoList {