gpu.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723
  1. //go:build linux || windows
  2. package gpu
  3. /*
  4. #cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
  5. #cgo windows LDFLAGS: -lpthread
  6. #include "gpu_info.h"
  7. */
  8. import "C"
  9. import (
  10. "bufio"
  11. "bytes"
  12. "fmt"
  13. "log/slog"
  14. "os"
  15. "path/filepath"
  16. "runtime"
  17. "strings"
  18. "sync"
  19. "unsafe"
  20. "github.com/ollama/ollama/envconfig"
  21. "github.com/ollama/ollama/format"
  22. )
  23. type cudaHandles struct {
  24. deviceCount int
  25. cudart *C.cudart_handle_t
  26. nvcuda *C.nvcuda_handle_t
  27. nvml *C.nvml_handle_t
  28. }
  29. type oneapiHandles struct {
  30. oneapi *C.oneapi_handle_t
  31. deviceCount int
  32. }
  33. const (
  34. cudaMinimumMemory = 457 * format.MebiByte
  35. rocmMinimumMemory = 457 * format.MebiByte
  36. // TODO OneAPI minimum memory
  37. )
  38. var (
  39. gpuMutex sync.Mutex
  40. bootstrapped bool
  41. cpuCapability CPUCapability
  42. cpus []CPUInfo
  43. cudaGPUs []CudaGPUInfo
  44. nvcudaLibPath string
  45. cudartLibPath string
  46. oneapiLibPath string
  47. nvmlLibPath string
  48. rocmGPUs []RocmGPUInfo
  49. oneapiGPUs []OneapiGPUInfo
  50. )
  51. // With our current CUDA compile flags, older than 5.0 will not work properly
  52. var CudaComputeMin = [2]C.int{5, 0}
  53. var RocmComputeMin = 9
  54. // TODO find a better way to detect iGPU instead of minimum memory
  55. const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
  56. var CudartLinuxGlobs = []string{
  57. "/usr/local/cuda/lib64/libcudart.so*",
  58. "/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
  59. "/usr/lib/x86_64-linux-gnu/libcudart.so*",
  60. "/usr/lib/wsl/lib/libcudart.so*",
  61. "/usr/lib/wsl/drivers/*/libcudart.so*",
  62. "/opt/cuda/lib64/libcudart.so*",
  63. "/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
  64. "/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
  65. "/usr/lib/aarch64-linux-gnu/libcudart.so*",
  66. "/usr/local/cuda/lib*/libcudart.so*",
  67. "/usr/lib*/libcudart.so*",
  68. "/usr/local/lib*/libcudart.so*",
  69. }
  70. var CudartWindowsGlobs = []string{
  71. "c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
  72. }
  73. var NvmlWindowsGlobs = []string{
  74. "c:\\Windows\\System32\\nvml.dll",
  75. }
  76. var NvcudaLinuxGlobs = []string{
  77. "/usr/local/cuda*/targets/*/lib/libcuda.so*",
  78. "/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
  79. "/usr/lib/*-linux-gnu/libcuda.so*",
  80. "/usr/lib/wsl/lib/libcuda.so*",
  81. "/usr/lib/wsl/drivers/*/libcuda.so*",
  82. "/opt/cuda/lib*/libcuda.so*",
  83. "/usr/local/cuda/lib*/libcuda.so*",
  84. "/usr/lib*/libcuda.so*",
  85. "/usr/local/lib*/libcuda.so*",
  86. }
  87. var NvcudaWindowsGlobs = []string{
  88. "c:\\windows\\system*\\nvcuda.dll",
  89. }
  90. var OneapiWindowsGlobs = []string{
  91. "c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
  92. }
  93. var OneapiLinuxGlobs = []string{
  94. "/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
  95. "/usr/lib*/libze_intel_gpu.so*",
  96. }
  97. // Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
  98. // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
  99. var CudaTegra string = os.Getenv("JETSON_JETPACK")
  100. // Note: gpuMutex must already be held
  101. func initCudaHandles() *cudaHandles {
  102. // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
  103. cHandles := &cudaHandles{}
  104. // Short Circuit if we already know which library to use
  105. if nvmlLibPath != "" {
  106. cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
  107. return cHandles
  108. }
  109. if nvcudaLibPath != "" {
  110. cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
  111. return cHandles
  112. }
  113. if cudartLibPath != "" {
  114. cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
  115. return cHandles
  116. }
  117. slog.Debug("searching for GPU discovery libraries for NVIDIA")
  118. var cudartMgmtName string
  119. var cudartMgmtPatterns []string
  120. var nvcudaMgmtName string
  121. var nvcudaMgmtPatterns []string
  122. var nvmlMgmtName string
  123. var nvmlMgmtPatterns []string
  124. tmpDir, _ := PayloadsDir()
  125. switch runtime.GOOS {
  126. case "windows":
  127. cudartMgmtName = "cudart64_*.dll"
  128. localAppData := os.Getenv("LOCALAPPDATA")
  129. cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
  130. cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
  131. // Aligned with driver, we can't carry as payloads
  132. nvcudaMgmtName = "nvcuda.dll"
  133. nvcudaMgmtPatterns = NvcudaWindowsGlobs
  134. // Use nvml to refresh free memory on windows only
  135. nvmlMgmtName = "nvml.dll"
  136. nvmlMgmtPatterns = make([]string, len(NvmlWindowsGlobs))
  137. copy(nvmlMgmtPatterns, NvmlWindowsGlobs)
  138. case "linux":
  139. cudartMgmtName = "libcudart.so*"
  140. if tmpDir != "" {
  141. // TODO - add "payloads" for subprocess
  142. cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
  143. }
  144. cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
  145. // Aligned with driver, we can't carry as payloads
  146. nvcudaMgmtName = "libcuda.so*"
  147. nvcudaMgmtPatterns = NvcudaLinuxGlobs
  148. // nvml omitted on linux
  149. default:
  150. return cHandles
  151. }
  152. if len(nvmlMgmtPatterns) > 0 {
  153. nvmlLibPaths := FindGPULibs(nvmlMgmtName, nvmlMgmtPatterns)
  154. if len(nvmlLibPaths) > 0 {
  155. nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
  156. if nvml != nil {
  157. slog.Debug("nvidia-ml loaded", "library", libPath)
  158. cHandles.nvml = nvml
  159. nvmlLibPath = libPath
  160. }
  161. }
  162. }
  163. nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
  164. if len(nvcudaLibPaths) > 0 {
  165. deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
  166. if nvcuda != nil {
  167. slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
  168. cHandles.nvcuda = nvcuda
  169. cHandles.deviceCount = deviceCount
  170. nvcudaLibPath = libPath
  171. return cHandles
  172. }
  173. }
  174. cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
  175. if len(cudartLibPaths) > 0 {
  176. deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
  177. if cudart != nil {
  178. slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
  179. cHandles.cudart = cudart
  180. cHandles.deviceCount = deviceCount
  181. cudartLibPath = libPath
  182. return cHandles
  183. }
  184. }
  185. return cHandles
  186. }
  187. // Note: gpuMutex must already be held
  188. func initOneAPIHandles() *oneapiHandles {
  189. oHandles := &oneapiHandles{}
  190. var oneapiMgmtName string
  191. var oneapiMgmtPatterns []string
  192. // Short Circuit if we already know which library to use
  193. if oneapiLibPath != "" {
  194. oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
  195. return oHandles
  196. }
  197. switch runtime.GOOS {
  198. case "windows":
  199. oneapiMgmtName = "ze_intel_gpu64.dll"
  200. oneapiMgmtPatterns = OneapiWindowsGlobs
  201. case "linux":
  202. oneapiMgmtName = "libze_intel_gpu.so"
  203. oneapiMgmtPatterns = OneapiLinuxGlobs
  204. default:
  205. return oHandles
  206. }
  207. oneapiLibPaths := FindGPULibs(oneapiMgmtName, oneapiMgmtPatterns)
  208. if len(oneapiLibPaths) > 0 {
  209. oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
  210. }
  211. return oHandles
  212. }
  213. func GetCPUInfo() GpuInfoList {
  214. gpuMutex.Lock()
  215. if !bootstrapped {
  216. gpuMutex.Unlock()
  217. GetGPUInfo()
  218. } else {
  219. gpuMutex.Unlock()
  220. }
  221. return GpuInfoList{cpus[0].GpuInfo}
  222. }
  223. func GetGPUInfo() GpuInfoList {
  224. // TODO - consider exploring lspci (and equivalent on windows) to check for
  225. // GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
  226. gpuMutex.Lock()
  227. defer gpuMutex.Unlock()
  228. needRefresh := true
  229. var cHandles *cudaHandles
  230. var oHandles *oneapiHandles
  231. defer func() {
  232. if cHandles != nil {
  233. if cHandles.cudart != nil {
  234. C.cudart_release(*cHandles.cudart)
  235. }
  236. if cHandles.nvcuda != nil {
  237. C.nvcuda_release(*cHandles.nvcuda)
  238. }
  239. if cHandles.nvml != nil {
  240. C.nvml_release(*cHandles.nvml)
  241. }
  242. }
  243. if oHandles != nil {
  244. if oHandles.oneapi != nil {
  245. // TODO - is this needed?
  246. C.oneapi_release(*oHandles.oneapi)
  247. }
  248. }
  249. }()
  250. if !bootstrapped {
  251. slog.Debug("Detecting GPUs")
  252. needRefresh = false
  253. cpuCapability = getCPUCapability()
  254. var memInfo C.mem_info_t
  255. mem, err := GetCPUMem()
  256. if err != nil {
  257. slog.Warn("error looking up system memory", "error", err)
  258. }
  259. cpus = []CPUInfo{CPUInfo{
  260. GpuInfo: GpuInfo{
  261. memInfo: mem,
  262. Library: "cpu",
  263. Variant: cpuCapability.ToVariant(),
  264. ID: "0",
  265. },
  266. }}
  267. // Fallback to CPU mode if we're lacking required vector extensions on x86
  268. if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
  269. slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability.ToString(), "detected", cpuCapability.ToString())
  270. bootstrapped = true
  271. // No need to do any GPU discovery, since we can't run on them
  272. return GpuInfoList{cpus[0].GpuInfo}
  273. }
  274. // On windows we bundle the nvidia library one level above the runner dir
  275. depPath := ""
  276. if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
  277. depPath = filepath.Dir(envconfig.RunnersDir)
  278. }
  279. // Load ALL libraries
  280. cHandles = initCudaHandles()
  281. // NVIDIA
  282. for i := range cHandles.deviceCount {
  283. if cHandles.cudart != nil || cHandles.nvcuda != nil {
  284. gpuInfo := CudaGPUInfo{
  285. GpuInfo: GpuInfo{
  286. Library: "cuda",
  287. },
  288. index: i,
  289. }
  290. var driverMajor int
  291. var driverMinor int
  292. if cHandles.cudart != nil {
  293. C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
  294. } else {
  295. C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
  296. driverMajor = int(cHandles.nvcuda.driver_major)
  297. driverMinor = int(cHandles.nvcuda.driver_minor)
  298. }
  299. if memInfo.err != nil {
  300. slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  301. C.free(unsafe.Pointer(memInfo.err))
  302. continue
  303. }
  304. if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
  305. slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
  306. continue
  307. }
  308. gpuInfo.TotalMemory = uint64(memInfo.total)
  309. gpuInfo.FreeMemory = uint64(memInfo.free)
  310. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  311. gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
  312. gpuInfo.MinimumMemory = cudaMinimumMemory
  313. gpuInfo.DependencyPath = depPath
  314. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  315. gpuInfo.DriverMajor = int(driverMajor)
  316. gpuInfo.DriverMinor = int(driverMinor)
  317. // TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
  318. cudaGPUs = append(cudaGPUs, gpuInfo)
  319. }
  320. }
  321. // Intel
  322. oHandles = initOneAPIHandles()
  323. for d := 0; oHandles.oneapi != nil && d < int(oHandles.oneapi.num_drivers); d++ {
  324. if oHandles.oneapi == nil {
  325. // shouldn't happen
  326. slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
  327. continue
  328. }
  329. devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
  330. for i := 0; i < int(devCount); i++ {
  331. gpuInfo := OneapiGPUInfo{
  332. GpuInfo: GpuInfo{
  333. Library: "oneapi",
  334. },
  335. driverIndex: d,
  336. gpuIndex: i,
  337. }
  338. // TODO - split bootstrapping from updating free memory
  339. C.oneapi_check_vram(*oHandles.oneapi, C.int(d), C.int(i), &memInfo)
  340. // TODO - convert this to MinimumMemory based on testing...
  341. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  342. memInfo.free = C.uint64_t(totalFreeMem)
  343. gpuInfo.TotalMemory = uint64(memInfo.total)
  344. gpuInfo.FreeMemory = uint64(memInfo.free)
  345. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  346. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  347. // TODO dependency path?
  348. oneapiGPUs = append(oneapiGPUs, gpuInfo)
  349. }
  350. }
  351. rocmGPUs = AMDGetGPUInfo()
  352. bootstrapped = true
  353. }
  354. // For detected GPUs, load library if not loaded
  355. // Refresh free memory usage
  356. if needRefresh {
  357. mem, err := GetCPUMem()
  358. if err != nil {
  359. slog.Warn("error looking up system memory", "error", err)
  360. } else {
  361. slog.Debug("updating system memory data",
  362. slog.Group(
  363. "before",
  364. "total", format.HumanBytes2(cpus[0].TotalMemory),
  365. "free", format.HumanBytes2(cpus[0].FreeMemory),
  366. ),
  367. slog.Group(
  368. "now",
  369. "total", format.HumanBytes2(mem.TotalMemory),
  370. "free", format.HumanBytes2(mem.FreeMemory),
  371. ),
  372. )
  373. cpus[0].FreeMemory = mem.FreeMemory
  374. }
  375. var memInfo C.mem_info_t
  376. if cHandles == nil && len(cudaGPUs) > 0 {
  377. cHandles = initCudaHandles()
  378. }
  379. for i, gpu := range cudaGPUs {
  380. if cHandles.nvml != nil {
  381. C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
  382. } else if cHandles.cudart != nil {
  383. C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
  384. } else if cHandles.nvcuda != nil {
  385. C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
  386. memInfo.used = memInfo.total - memInfo.free
  387. } else {
  388. // shouldn't happen
  389. slog.Warn("no valid cuda library loaded to refresh vram usage")
  390. break
  391. }
  392. if memInfo.err != nil {
  393. slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  394. C.free(unsafe.Pointer(memInfo.err))
  395. continue
  396. }
  397. if memInfo.free == 0 {
  398. slog.Warn("error looking up nvidia GPU memory")
  399. continue
  400. }
  401. slog.Debug("updating cuda memory data",
  402. "gpu", gpu.ID,
  403. "name", gpu.Name,
  404. slog.Group(
  405. "before",
  406. "total", format.HumanBytes2(gpu.TotalMemory),
  407. "free", format.HumanBytes2(gpu.FreeMemory),
  408. ),
  409. slog.Group(
  410. "now",
  411. "total", format.HumanBytes2(uint64(memInfo.total)),
  412. "free", format.HumanBytes2(uint64(memInfo.free)),
  413. "used", format.HumanBytes2(uint64(memInfo.used)),
  414. ),
  415. )
  416. cudaGPUs[i].FreeMemory = uint64(memInfo.free)
  417. }
  418. if oHandles == nil && len(oneapiGPUs) > 0 {
  419. oHandles = initOneAPIHandles()
  420. }
  421. for i, gpu := range oneapiGPUs {
  422. if oHandles.oneapi == nil {
  423. // shouldn't happen
  424. slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
  425. continue
  426. }
  427. C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
  428. // TODO - convert this to MinimumMemory based on testing...
  429. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  430. memInfo.free = C.uint64_t(totalFreeMem)
  431. oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
  432. }
  433. err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
  434. if err != nil {
  435. slog.Debug("problem refreshing ROCm free memory", "error", err)
  436. }
  437. }
  438. resp := []GpuInfo{}
  439. for _, gpu := range cudaGPUs {
  440. resp = append(resp, gpu.GpuInfo)
  441. }
  442. for _, gpu := range rocmGPUs {
  443. resp = append(resp, gpu.GpuInfo)
  444. }
  445. for _, gpu := range oneapiGPUs {
  446. resp = append(resp, gpu.GpuInfo)
  447. }
  448. if len(resp) == 0 {
  449. resp = append(resp, cpus[0].GpuInfo)
  450. }
  451. return resp
  452. }
  453. func GetCPUMem() (memInfo, error) {
  454. if runtime.GOOS == "linux" {
  455. return GetLinuxMemInfo()
  456. }
  457. var ret memInfo
  458. var info C.mem_info_t
  459. C.cpu_check_ram(&info)
  460. if info.err != nil {
  461. defer C.free(unsafe.Pointer(info.err))
  462. return ret, fmt.Errorf(C.GoString(info.err))
  463. }
  464. ret.FreeMemory = uint64(info.free)
  465. ret.TotalMemory = uint64(info.total)
  466. return ret, nil
  467. }
  468. func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
  469. // Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
  470. var ldPaths []string
  471. var patterns []string
  472. gpuLibPaths := []string{}
  473. slog.Debug("Searching for GPU library", "name", baseLibName)
  474. switch runtime.GOOS {
  475. case "windows":
  476. ldPaths = strings.Split(os.Getenv("PATH"), ";")
  477. case "linux":
  478. ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
  479. default:
  480. return gpuLibPaths
  481. }
  482. // Start with whatever we find in the PATH/LD_LIBRARY_PATH
  483. for _, ldPath := range ldPaths {
  484. d, err := filepath.Abs(ldPath)
  485. if err != nil {
  486. continue
  487. }
  488. patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
  489. }
  490. patterns = append(patterns, defaultPatterns...)
  491. slog.Debug("gpu library search", "globs", patterns)
  492. for _, pattern := range patterns {
  493. // Nvidia PhysX known to return bogus results
  494. if strings.Contains(pattern, "PhysX") {
  495. slog.Debug("skipping PhysX cuda library path", "path", pattern)
  496. continue
  497. }
  498. // Ignore glob discovery errors
  499. matches, _ := filepath.Glob(pattern)
  500. for _, match := range matches {
  501. // Resolve any links so we don't try the same lib multiple times
  502. // and weed out any dups across globs
  503. libPath := match
  504. tmp := match
  505. var err error
  506. for ; err == nil; tmp, err = os.Readlink(libPath) {
  507. if !filepath.IsAbs(tmp) {
  508. tmp = filepath.Join(filepath.Dir(libPath), tmp)
  509. }
  510. libPath = tmp
  511. }
  512. new := true
  513. for _, cmp := range gpuLibPaths {
  514. if cmp == libPath {
  515. new = false
  516. break
  517. }
  518. }
  519. if new {
  520. gpuLibPaths = append(gpuLibPaths, libPath)
  521. }
  522. }
  523. }
  524. slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
  525. return gpuLibPaths
  526. }
  527. func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
  528. var resp C.cudart_init_resp_t
  529. resp.ch.verbose = getVerboseState()
  530. for _, libPath := range cudartLibPaths {
  531. lib := C.CString(libPath)
  532. defer C.free(unsafe.Pointer(lib))
  533. C.cudart_init(lib, &resp)
  534. if resp.err != nil {
  535. slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
  536. C.free(unsafe.Pointer(resp.err))
  537. } else {
  538. return int(resp.num_devices), &resp.ch, libPath
  539. }
  540. }
  541. return 0, nil, ""
  542. }
  543. func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
  544. var resp C.nvcuda_init_resp_t
  545. resp.ch.verbose = getVerboseState()
  546. for _, libPath := range nvcudaLibPaths {
  547. lib := C.CString(libPath)
  548. defer C.free(unsafe.Pointer(lib))
  549. C.nvcuda_init(lib, &resp)
  550. if resp.err != nil {
  551. slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
  552. C.free(unsafe.Pointer(resp.err))
  553. } else {
  554. return int(resp.num_devices), &resp.ch, libPath
  555. }
  556. }
  557. return 0, nil, ""
  558. }
  559. func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
  560. var resp C.nvml_init_resp_t
  561. resp.ch.verbose = getVerboseState()
  562. for _, libPath := range nvmlLibPaths {
  563. lib := C.CString(libPath)
  564. defer C.free(unsafe.Pointer(lib))
  565. C.nvml_init(lib, &resp)
  566. if resp.err != nil {
  567. slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
  568. C.free(unsafe.Pointer(resp.err))
  569. } else {
  570. return &resp.ch, libPath
  571. }
  572. }
  573. return nil, ""
  574. }
  575. func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
  576. var resp C.oneapi_init_resp_t
  577. num_devices := 0
  578. resp.oh.verbose = getVerboseState()
  579. for _, libPath := range oneapiLibPaths {
  580. lib := C.CString(libPath)
  581. defer C.free(unsafe.Pointer(lib))
  582. C.oneapi_init(lib, &resp)
  583. if resp.err != nil {
  584. slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
  585. C.free(unsafe.Pointer(resp.err))
  586. } else {
  587. for i := 0; i < int(resp.oh.num_drivers); i++ {
  588. num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
  589. }
  590. return num_devices, &resp.oh, libPath
  591. }
  592. }
  593. return 0, nil, ""
  594. }
  595. func getVerboseState() C.uint16_t {
  596. if envconfig.Debug {
  597. return C.uint16_t(1)
  598. }
  599. return C.uint16_t(0)
  600. }
  601. // Given the list of GPUs this instantiation is targeted for,
  602. // figure out the visible devices environment variable
  603. //
  604. // If different libraries are detected, the first one is what we use
  605. func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
  606. if len(l) == 0 {
  607. return "", ""
  608. }
  609. switch l[0].Library {
  610. case "cuda":
  611. return cudaGetVisibleDevicesEnv(l)
  612. case "rocm":
  613. return rocmGetVisibleDevicesEnv(l)
  614. case "oneapi":
  615. return oneapiGetVisibleDevicesEnv(l)
  616. default:
  617. slog.Debug("no filter required for library " + l[0].Library)
  618. return "", ""
  619. }
  620. }
  621. func GetLinuxMemInfo() (memInfo, error) {
  622. var mem memInfo
  623. var total, available, free, buffers, cached uint64
  624. f, err := os.Open("/proc/meminfo")
  625. if err != nil {
  626. return mem, err
  627. }
  628. defer f.Close()
  629. s := bufio.NewScanner(f)
  630. for s.Scan() {
  631. switch {
  632. case bytes.HasPrefix(s.Bytes(), []byte(`MemTotal:`)):
  633. _, err = fmt.Sscanf(s.Text(), "MemTotal:%d", &total)
  634. case bytes.HasPrefix(s.Bytes(), []byte(`MemAvailable:`)):
  635. _, err = fmt.Sscanf(s.Text(), "MemAvailable:%d", &available)
  636. case bytes.HasPrefix(s.Bytes(), []byte(`MemFree:`)):
  637. _, err = fmt.Sscanf(s.Text(), "MemFree:%d", &free)
  638. case bytes.HasPrefix(s.Bytes(), []byte(`Buffers:`)):
  639. _, err = fmt.Sscanf(s.Text(), "Buffers:%d", &buffers)
  640. case bytes.HasPrefix(s.Bytes(), []byte(`Cached:`)):
  641. _, err = fmt.Sscanf(s.Text(), "Cached:%d", &cached)
  642. default:
  643. continue
  644. }
  645. if err != nil {
  646. return mem, err
  647. }
  648. if total > 0 && available > 0 {
  649. mem.TotalMemory = total * 1024
  650. mem.FreeMemory = available * 1024
  651. return mem, nil
  652. }
  653. }
  654. mem.TotalMemory = total * 1024
  655. mem.FreeMemory = (free + buffers + cached) * 1024
  656. return mem, nil
  657. }