|
@@ -24,19 +24,37 @@ import (
|
|
|
"github.com/ollama/ollama/format"
|
|
|
)
|
|
|
|
|
|
-type handles struct {
|
|
|
+type cudaHandles struct {
|
|
|
deviceCount int
|
|
|
cudart *C.cudart_handle_t
|
|
|
nvcuda *C.nvcuda_handle_t
|
|
|
+ nvml *C.nvml_handle_t
|
|
|
+}
|
|
|
+
|
|
|
+type oneapiHandles struct {
|
|
|
oneapi *C.oneapi_handle_t
|
|
|
+ deviceCount int
|
|
|
}
|
|
|
|
|
|
const (
|
|
|
cudaMinimumMemory = 457 * format.MebiByte
|
|
|
rocmMinimumMemory = 457 * format.MebiByte
|
|
|
+ // TODO OneAPI minimum memory
|
|
|
)
|
|
|
|
|
|
-var gpuMutex sync.Mutex
|
|
|
+var (
|
|
|
+ gpuMutex sync.Mutex
|
|
|
+ bootstrapped bool
|
|
|
+ cpuCapability CPUCapability
|
|
|
+ cpus []CPUInfo
|
|
|
+ cudaGPUs []CudaGPUInfo
|
|
|
+ nvcudaLibPath string
|
|
|
+ cudartLibPath string
|
|
|
+ oneapiLibPath string
|
|
|
+ nvmlLibPath string
|
|
|
+ rocmGPUs []RocmGPUInfo
|
|
|
+ oneapiGPUs []OneapiGPUInfo
|
|
|
+)
|
|
|
|
|
|
// With our current CUDA compile flags, older than 5.0 will not work properly
|
|
|
var CudaComputeMin = [2]C.int{5, 0}
|
|
@@ -46,113 +64,113 @@ var RocmComputeMin = 9
|
|
|
// TODO find a better way to detect iGPU instead of minimum memory
|
|
|
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
|
|
|
|
|
|
-var CudartLinuxGlobs = []string{
|
|
|
- "/usr/local/cuda/lib64/libcudart.so*",
|
|
|
- "/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
|
|
|
- "/usr/lib/x86_64-linux-gnu/libcudart.so*",
|
|
|
- "/usr/lib/wsl/lib/libcudart.so*",
|
|
|
- "/usr/lib/wsl/drivers/*/libcudart.so*",
|
|
|
- "/opt/cuda/lib64/libcudart.so*",
|
|
|
- "/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
|
|
|
- "/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
|
|
|
- "/usr/lib/aarch64-linux-gnu/libcudart.so*",
|
|
|
- "/usr/local/cuda/lib*/libcudart.so*",
|
|
|
- "/usr/lib*/libcudart.so*",
|
|
|
- "/usr/local/lib*/libcudart.so*",
|
|
|
-}
|
|
|
-
|
|
|
-var CudartWindowsGlobs = []string{
|
|
|
- "c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
|
|
|
-}
|
|
|
-
|
|
|
-var NvcudaLinuxGlobs = []string{
|
|
|
- "/usr/local/cuda*/targets/*/lib/libcuda.so*",
|
|
|
- "/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
|
|
|
- "/usr/lib/*-linux-gnu/libcuda.so*",
|
|
|
- "/usr/lib/wsl/lib/libcuda.so*",
|
|
|
- "/usr/lib/wsl/drivers/*/libcuda.so*",
|
|
|
- "/opt/cuda/lib*/libcuda.so*",
|
|
|
- "/usr/local/cuda/lib*/libcuda.so*",
|
|
|
- "/usr/lib*/libcuda.so*",
|
|
|
- "/usr/local/lib*/libcuda.so*",
|
|
|
-}
|
|
|
-
|
|
|
-var NvcudaWindowsGlobs = []string{
|
|
|
- "c:\\windows\\system*\\nvcuda.dll",
|
|
|
-}
|
|
|
-
|
|
|
-var OneapiWindowsGlobs = []string{
|
|
|
- "c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
|
|
|
-}
|
|
|
-
|
|
|
-var OneapiLinuxGlobs = []string{
|
|
|
- "/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
|
|
|
- "/usr/lib*/libze_intel_gpu.so*",
|
|
|
-}
|
|
|
-
|
|
|
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
|
|
|
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
|
|
|
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
|
|
|
|
|
// Note: gpuMutex must already be held
|
|
|
-func initGPUHandles() *handles {
|
|
|
+func initCudaHandles() *cudaHandles {
|
|
|
|
|
|
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
|
|
|
|
|
- gpuHandles := &handles{}
|
|
|
- var cudartMgmtName string
|
|
|
+ cHandles := &cudaHandles{}
|
|
|
+ // Short Circuit if we already know which library to use
|
|
|
+ if nvmlLibPath != "" {
|
|
|
+ cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
|
|
|
+ return cHandles
|
|
|
+ }
|
|
|
+ if nvcudaLibPath != "" {
|
|
|
+ cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
|
|
|
+ return cHandles
|
|
|
+ }
|
|
|
+ if cudartLibPath != "" {
|
|
|
+ cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
|
|
|
+ return cHandles
|
|
|
+ }
|
|
|
+
|
|
|
+ slog.Debug("searching for GPU discovery libraries for NVIDIA")
|
|
|
var cudartMgmtPatterns []string
|
|
|
- var nvcudaMgmtName string
|
|
|
- var nvcudaMgmtPatterns []string
|
|
|
|
|
|
- tmpDir, _ := PayloadsDir()
|
|
|
- switch runtime.GOOS {
|
|
|
- case "windows":
|
|
|
- cudartMgmtName = "cudart64_*.dll"
|
|
|
+ // Aligned with driver, we can't carry as payloads
|
|
|
+ nvcudaMgmtPatterns := NvcudaGlobs
|
|
|
+
|
|
|
+ if runtime.GOOS == "windows" {
|
|
|
localAppData := os.Getenv("LOCALAPPDATA")
|
|
|
- cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
|
|
|
- cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
|
|
|
- // Aligned with driver, we can't carry as payloads
|
|
|
- nvcudaMgmtName = "nvcuda.dll"
|
|
|
- nvcudaMgmtPatterns = NvcudaWindowsGlobs
|
|
|
- case "linux":
|
|
|
- cudartMgmtName = "libcudart.so*"
|
|
|
- if tmpDir != "" {
|
|
|
- // TODO - add "payloads" for subprocess
|
|
|
- cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
|
|
|
+ cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
|
|
|
+ }
|
|
|
+ tmpDir, _ := PayloadsDir()
|
|
|
+ if tmpDir != "" {
|
|
|
+ // TODO - add "payloads" for subprocess
|
|
|
+ cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)}
|
|
|
+ }
|
|
|
+ cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
|
|
|
+
|
|
|
+ if len(NvmlGlobs) > 0 {
|
|
|
+ nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
|
|
|
+ if len(nvmlLibPaths) > 0 {
|
|
|
+ nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
|
|
|
+ if nvml != nil {
|
|
|
+ slog.Debug("nvidia-ml loaded", "library", libPath)
|
|
|
+ cHandles.nvml = nvml
|
|
|
+ nvmlLibPath = libPath
|
|
|
+ }
|
|
|
}
|
|
|
- cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
|
|
|
- // Aligned with driver, we can't carry as payloads
|
|
|
- nvcudaMgmtName = "libcuda.so*"
|
|
|
- nvcudaMgmtPatterns = NvcudaLinuxGlobs
|
|
|
- default:
|
|
|
- return gpuHandles
|
|
|
}
|
|
|
|
|
|
- slog.Debug("Detecting GPUs")
|
|
|
- nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
|
|
|
+ nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
|
|
|
if len(nvcudaLibPaths) > 0 {
|
|
|
deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
|
|
|
if nvcuda != nil {
|
|
|
slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
|
|
|
- gpuHandles.nvcuda = nvcuda
|
|
|
- gpuHandles.deviceCount = deviceCount
|
|
|
- return gpuHandles
|
|
|
+ cHandles.nvcuda = nvcuda
|
|
|
+ cHandles.deviceCount = deviceCount
|
|
|
+ nvcudaLibPath = libPath
|
|
|
+ return cHandles
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
|
|
|
+ cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
|
|
|
if len(cudartLibPaths) > 0 {
|
|
|
deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
|
|
|
if cudart != nil {
|
|
|
slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
|
|
|
- gpuHandles.cudart = cudart
|
|
|
- gpuHandles.deviceCount = deviceCount
|
|
|
- return gpuHandles
|
|
|
+ cHandles.cudart = cudart
|
|
|
+ cHandles.deviceCount = deviceCount
|
|
|
+ cudartLibPath = libPath
|
|
|
+ return cHandles
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- return gpuHandles
|
|
|
+ return cHandles
|
|
|
+}
|
|
|
+
|
|
|
+// Note: gpuMutex must already be held
|
|
|
+func initOneAPIHandles() *oneapiHandles {
|
|
|
+ oHandles := &oneapiHandles{}
|
|
|
+
|
|
|
+ // Short Circuit if we already know which library to use
|
|
|
+ if oneapiLibPath != "" {
|
|
|
+ oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
|
|
|
+ return oHandles
|
|
|
+ }
|
|
|
+
|
|
|
+ oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
|
|
|
+ if len(oneapiLibPaths) > 0 {
|
|
|
+ oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
|
|
|
+ }
|
|
|
+
|
|
|
+ return oHandles
|
|
|
+}
|
|
|
+
|
|
|
+func GetCPUInfo() GpuInfoList {
|
|
|
+ gpuMutex.Lock()
|
|
|
+ if !bootstrapped {
|
|
|
+ gpuMutex.Unlock()
|
|
|
+ GetGPUInfo()
|
|
|
+ } else {
|
|
|
+ gpuMutex.Unlock()
|
|
|
+ }
|
|
|
+ return GpuInfoList{cpus[0].GpuInfo}
|
|
|
}
|
|
|
|
|
|
func GetGPUInfo() GpuInfoList {
|
|
@@ -160,110 +178,245 @@ func GetGPUInfo() GpuInfoList {
|
|
|
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
|
|
|
gpuMutex.Lock()
|
|
|
defer gpuMutex.Unlock()
|
|
|
-
|
|
|
- gpuHandles := initGPUHandles()
|
|
|
+ needRefresh := true
|
|
|
+ var cHandles *cudaHandles
|
|
|
+ var oHandles *oneapiHandles
|
|
|
defer func() {
|
|
|
- if gpuHandles.cudart != nil {
|
|
|
- C.cudart_release(*gpuHandles.cudart)
|
|
|
+ if cHandles != nil {
|
|
|
+ if cHandles.cudart != nil {
|
|
|
+ C.cudart_release(*cHandles.cudart)
|
|
|
+ }
|
|
|
+ if cHandles.nvcuda != nil {
|
|
|
+ C.nvcuda_release(*cHandles.nvcuda)
|
|
|
+ }
|
|
|
+ if cHandles.nvml != nil {
|
|
|
+ C.nvml_release(*cHandles.nvml)
|
|
|
+ }
|
|
|
}
|
|
|
- if gpuHandles.nvcuda != nil {
|
|
|
- C.nvcuda_release(*gpuHandles.nvcuda)
|
|
|
+ if oHandles != nil {
|
|
|
+ if oHandles.oneapi != nil {
|
|
|
+ // TODO - is this needed?
|
|
|
+ C.oneapi_release(*oHandles.oneapi)
|
|
|
+ }
|
|
|
}
|
|
|
}()
|
|
|
|
|
|
- // All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
|
|
|
- cpuVariant := GetCPUVariant()
|
|
|
- if cpuVariant == "" && runtime.GOARCH == "amd64" {
|
|
|
- slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
|
|
|
- }
|
|
|
+ if !bootstrapped {
|
|
|
+ slog.Debug("Detecting GPUs")
|
|
|
+ needRefresh = false
|
|
|
+ cpuCapability = GetCPUCapability()
|
|
|
+ var memInfo C.mem_info_t
|
|
|
|
|
|
- // On windows we bundle the nvidia library one level above the runner dir
|
|
|
- depPath := ""
|
|
|
- if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
|
|
|
- depPath = filepath.Dir(envconfig.RunnersDir)
|
|
|
- }
|
|
|
+ mem, err := GetCPUMem()
|
|
|
+ if err != nil {
|
|
|
+ slog.Warn("error looking up system memory", "error", err)
|
|
|
+ }
|
|
|
+ cpus = []CPUInfo{CPUInfo{
|
|
|
+ GpuInfo: GpuInfo{
|
|
|
+ memInfo: mem,
|
|
|
+ Library: "cpu",
|
|
|
+ Variant: cpuCapability,
|
|
|
+ ID: "0",
|
|
|
+ },
|
|
|
+ }}
|
|
|
+
|
|
|
+ // Fallback to CPU mode if we're lacking required vector extensions on x86
|
|
|
+ if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
|
|
|
+ slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability)
|
|
|
+ bootstrapped = true
|
|
|
+ // No need to do any GPU discovery, since we can't run on them
|
|
|
+ return GpuInfoList{cpus[0].GpuInfo}
|
|
|
+ }
|
|
|
|
|
|
- var memInfo C.mem_info_t
|
|
|
- resp := []GpuInfo{}
|
|
|
+ // On windows we bundle the nvidia library one level above the runner dir
|
|
|
+ depPath := ""
|
|
|
+ if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
|
|
|
+ depPath = filepath.Dir(envconfig.RunnersDir)
|
|
|
+ }
|
|
|
|
|
|
- // NVIDIA first
|
|
|
- for i := range gpuHandles.deviceCount {
|
|
|
- // TODO once we support CPU compilation variants of GPU libraries refine this...
|
|
|
- if cpuVariant == "" && runtime.GOARCH == "amd64" {
|
|
|
- continue
|
|
|
+ // Load ALL libraries
|
|
|
+ cHandles = initCudaHandles()
|
|
|
+
|
|
|
+ // NVIDIA
|
|
|
+ for i := range cHandles.deviceCount {
|
|
|
+ if cHandles.cudart != nil || cHandles.nvcuda != nil {
|
|
|
+ gpuInfo := CudaGPUInfo{
|
|
|
+ GpuInfo: GpuInfo{
|
|
|
+ Library: "cuda",
|
|
|
+ },
|
|
|
+ index: i,
|
|
|
+ }
|
|
|
+ var driverMajor int
|
|
|
+ var driverMinor int
|
|
|
+ if cHandles.cudart != nil {
|
|
|
+ C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
|
|
|
+ } else {
|
|
|
+ C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
|
|
|
+ driverMajor = int(cHandles.nvcuda.driver_major)
|
|
|
+ driverMinor = int(cHandles.nvcuda.driver_minor)
|
|
|
+ }
|
|
|
+ if memInfo.err != nil {
|
|
|
+ slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
|
|
|
+ C.free(unsafe.Pointer(memInfo.err))
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
|
|
|
+ slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ gpuInfo.TotalMemory = uint64(memInfo.total)
|
|
|
+ gpuInfo.FreeMemory = uint64(memInfo.free)
|
|
|
+ gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
|
|
+ gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
|
|
|
+ gpuInfo.MinimumMemory = cudaMinimumMemory
|
|
|
+ gpuInfo.DependencyPath = depPath
|
|
|
+ gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
|
|
+ gpuInfo.DriverMajor = driverMajor
|
|
|
+ gpuInfo.DriverMinor = driverMinor
|
|
|
+
|
|
|
+ // TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
|
|
|
+ cudaGPUs = append(cudaGPUs, gpuInfo)
|
|
|
+ }
|
|
|
}
|
|
|
- if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
|
|
|
- gpuInfo := GpuInfo{
|
|
|
- Library: "cuda",
|
|
|
+
|
|
|
+ // Intel
|
|
|
+ oHandles = initOneAPIHandles()
|
|
|
+ for d := 0; oHandles.oneapi != nil && d < int(oHandles.oneapi.num_drivers); d++ {
|
|
|
+ if oHandles.oneapi == nil {
|
|
|
+ // shouldn't happen
|
|
|
+ slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
|
|
|
+ for i := range devCount {
|
|
|
+ gpuInfo := OneapiGPUInfo{
|
|
|
+ GpuInfo: GpuInfo{
|
|
|
+ Library: "oneapi",
|
|
|
+ },
|
|
|
+ driverIndex: d,
|
|
|
+ gpuIndex: int(i),
|
|
|
+ }
|
|
|
+ // TODO - split bootstrapping from updating free memory
|
|
|
+ C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
|
|
|
+ // TODO - convert this to MinimumMemory based on testing...
|
|
|
+ var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
|
|
+ memInfo.free = C.uint64_t(totalFreeMem)
|
|
|
+ gpuInfo.TotalMemory = uint64(memInfo.total)
|
|
|
+ gpuInfo.FreeMemory = uint64(memInfo.free)
|
|
|
+ gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
|
|
+ gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
|
|
+ // TODO dependency path?
|
|
|
+ oneapiGPUs = append(oneapiGPUs, gpuInfo)
|
|
|
}
|
|
|
- var driverMajor int
|
|
|
- var driverMinor int
|
|
|
- if gpuHandles.cudart != nil {
|
|
|
- C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
|
|
|
+ }
|
|
|
+
|
|
|
+ rocmGPUs = AMDGetGPUInfo()
|
|
|
+ bootstrapped = true
|
|
|
+ }
|
|
|
+
|
|
|
+ // For detected GPUs, load library if not loaded
|
|
|
+
|
|
|
+ // Refresh free memory usage
|
|
|
+ if needRefresh {
|
|
|
+ mem, err := GetCPUMem()
|
|
|
+ if err != nil {
|
|
|
+ slog.Warn("error looking up system memory", "error", err)
|
|
|
+ } else {
|
|
|
+ slog.Debug("updating system memory data",
|
|
|
+ slog.Group(
|
|
|
+ "before",
|
|
|
+ "total", format.HumanBytes2(cpus[0].TotalMemory),
|
|
|
+ "free", format.HumanBytes2(cpus[0].FreeMemory),
|
|
|
+ ),
|
|
|
+ slog.Group(
|
|
|
+ "now",
|
|
|
+ "total", format.HumanBytes2(mem.TotalMemory),
|
|
|
+ "free", format.HumanBytes2(mem.FreeMemory),
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ cpus[0].FreeMemory = mem.FreeMemory
|
|
|
+ }
|
|
|
+
|
|
|
+ var memInfo C.mem_info_t
|
|
|
+ if cHandles == nil && len(cudaGPUs) > 0 {
|
|
|
+ cHandles = initCudaHandles()
|
|
|
+ }
|
|
|
+ for i, gpu := range cudaGPUs {
|
|
|
+ if cHandles.nvml != nil {
|
|
|
+ C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
|
|
|
+ } else if cHandles.cudart != nil {
|
|
|
+ C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
|
|
|
+ } else if cHandles.nvcuda != nil {
|
|
|
+ C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
|
|
|
+ memInfo.used = memInfo.total - memInfo.free
|
|
|
} else {
|
|
|
- C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
|
|
|
- driverMajor = int(gpuHandles.nvcuda.driver_major)
|
|
|
- driverMinor = int(gpuHandles.nvcuda.driver_minor)
|
|
|
+ // shouldn't happen
|
|
|
+ slog.Warn("no valid cuda library loaded to refresh vram usage")
|
|
|
+ break
|
|
|
}
|
|
|
if memInfo.err != nil {
|
|
|
- slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
|
|
|
+ slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
|
|
|
C.free(unsafe.Pointer(memInfo.err))
|
|
|
continue
|
|
|
}
|
|
|
- if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
|
|
|
- slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
|
|
|
+ if memInfo.free == 0 {
|
|
|
+ slog.Warn("error looking up nvidia GPU memory")
|
|
|
continue
|
|
|
}
|
|
|
- gpuInfo.TotalMemory = uint64(memInfo.total)
|
|
|
- gpuInfo.FreeMemory = uint64(memInfo.free)
|
|
|
- gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
|
|
- gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
|
|
|
- gpuInfo.MinimumMemory = cudaMinimumMemory
|
|
|
- gpuInfo.DependencyPath = depPath
|
|
|
- gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
|
|
- gpuInfo.DriverMajor = driverMajor
|
|
|
- gpuInfo.DriverMinor = driverMinor
|
|
|
-
|
|
|
- // TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
|
|
|
- resp = append(resp, gpuInfo)
|
|
|
+ slog.Debug("updating cuda memory data",
|
|
|
+ "gpu", gpu.ID,
|
|
|
+ "name", gpu.Name,
|
|
|
+ slog.Group(
|
|
|
+ "before",
|
|
|
+ "total", format.HumanBytes2(gpu.TotalMemory),
|
|
|
+ "free", format.HumanBytes2(gpu.FreeMemory),
|
|
|
+ ),
|
|
|
+ slog.Group(
|
|
|
+ "now",
|
|
|
+ "total", format.HumanBytes2(uint64(memInfo.total)),
|
|
|
+ "free", format.HumanBytes2(uint64(memInfo.free)),
|
|
|
+ "used", format.HumanBytes2(uint64(memInfo.used)),
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ cudaGPUs[i].FreeMemory = uint64(memInfo.free)
|
|
|
}
|
|
|
- }
|
|
|
-
|
|
|
- // Then AMD
|
|
|
- resp = append(resp, AMDGetGPUInfo()...)
|
|
|
|
|
|
- if len(resp) == 0 {
|
|
|
- C.cpu_check_ram(&memInfo)
|
|
|
- if memInfo.err != nil {
|
|
|
- slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
|
|
|
- C.free(unsafe.Pointer(memInfo.err))
|
|
|
- return resp
|
|
|
+ if oHandles == nil && len(oneapiGPUs) > 0 {
|
|
|
+ oHandles = initOneAPIHandles()
|
|
|
}
|
|
|
- gpuInfo := GpuInfo{
|
|
|
- Library: "cpu",
|
|
|
- Variant: cpuVariant,
|
|
|
+ for i, gpu := range oneapiGPUs {
|
|
|
+ if oHandles.oneapi == nil {
|
|
|
+ // shouldn't happen
|
|
|
+ slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
|
|
|
+ // TODO - convert this to MinimumMemory based on testing...
|
|
|
+ var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
|
|
+ memInfo.free = C.uint64_t(totalFreeMem)
|
|
|
+ oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
|
|
|
}
|
|
|
- gpuInfo.TotalMemory = uint64(memInfo.total)
|
|
|
- gpuInfo.FreeMemory = uint64(memInfo.free)
|
|
|
- gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
|
|
|
|
|
- resp = append(resp, gpuInfo)
|
|
|
+ err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
|
|
|
+ if err != nil {
|
|
|
+ slog.Debug("problem refreshing ROCm free memory", "error", err)
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
- return resp
|
|
|
-}
|
|
|
-
|
|
|
-func GetCPUMem() (memInfo, error) {
|
|
|
- var ret memInfo
|
|
|
- var info C.mem_info_t
|
|
|
- C.cpu_check_ram(&info)
|
|
|
- if info.err != nil {
|
|
|
- defer C.free(unsafe.Pointer(info.err))
|
|
|
- return ret, fmt.Errorf(C.GoString(info.err))
|
|
|
+ resp := []GpuInfo{}
|
|
|
+ for _, gpu := range cudaGPUs {
|
|
|
+ resp = append(resp, gpu.GpuInfo)
|
|
|
+ }
|
|
|
+ for _, gpu := range rocmGPUs {
|
|
|
+ resp = append(resp, gpu.GpuInfo)
|
|
|
+ }
|
|
|
+ for _, gpu := range oneapiGPUs {
|
|
|
+ resp = append(resp, gpu.GpuInfo)
|
|
|
+ }
|
|
|
+ if len(resp) == 0 {
|
|
|
+ resp = append(resp, cpus[0].GpuInfo)
|
|
|
}
|
|
|
- ret.FreeMemory = uint64(info.free)
|
|
|
- ret.TotalMemory = uint64(info.total)
|
|
|
- return ret, nil
|
|
|
+ return resp
|
|
|
}
|
|
|
|
|
|
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
|
|
@@ -362,8 +515,26 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
|
|
|
return 0, nil, ""
|
|
|
}
|
|
|
|
|
|
+func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
|
|
|
+ var resp C.nvml_init_resp_t
|
|
|
+ resp.ch.verbose = getVerboseState()
|
|
|
+ for _, libPath := range nvmlLibPaths {
|
|
|
+ lib := C.CString(libPath)
|
|
|
+ defer C.free(unsafe.Pointer(lib))
|
|
|
+ C.nvml_init(lib, &resp)
|
|
|
+ if resp.err != nil {
|
|
|
+ slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
|
|
|
+ C.free(unsafe.Pointer(resp.err))
|
|
|
+ } else {
|
|
|
+ return &resp.ch, libPath
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return nil, ""
|
|
|
+}
|
|
|
+
|
|
|
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
|
|
|
var resp C.oneapi_init_resp_t
|
|
|
+ num_devices := 0
|
|
|
resp.oh.verbose = getVerboseState()
|
|
|
for _, libPath := range oneapiLibPaths {
|
|
|
lib := C.CString(libPath)
|
|
@@ -373,7 +544,10 @@ func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
|
|
|
slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
|
|
|
C.free(unsafe.Pointer(resp.err))
|
|
|
} else {
|
|
|
- return int(resp.num_devices), &resp.oh, libPath
|
|
|
+ for i := range resp.oh.num_drivers {
|
|
|
+ num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
|
|
|
+ }
|
|
|
+ return num_devices, &resp.oh, libPath
|
|
|
}
|
|
|
}
|
|
|
return 0, nil, ""
|