gpu.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719
  1. //go:build linux || windows
  2. package discover
  3. /*
  4. #cgo CPPFLAGS: -O3
  5. #cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
  6. #cgo windows LDFLAGS: -lpthread
  7. #include "gpu_info.h"
  8. */
  9. import "C"
  10. import (
  11. "fmt"
  12. "log/slog"
  13. "os"
  14. "path/filepath"
  15. "runtime"
  16. "strconv"
  17. "strings"
  18. "sync"
  19. "unsafe"
  20. "github.com/ollama/ollama/envconfig"
  21. "github.com/ollama/ollama/format"
  22. )
  23. type cudaHandles struct {
  24. deviceCount int
  25. cudart *C.cudart_handle_t
  26. nvcuda *C.nvcuda_handle_t
  27. nvml *C.nvml_handle_t
  28. }
  29. type oneapiHandles struct {
  30. oneapi *C.oneapi_handle_t
  31. deviceCount int
  32. }
  33. const (
  34. cudaMinimumMemory = 457 * format.MebiByte
  35. rocmMinimumMemory = 457 * format.MebiByte
  36. // TODO OneAPI minimum memory
  37. )
  38. var (
  39. gpuMutex sync.Mutex
  40. bootstrapped bool
  41. cpus []CPUInfo
  42. cudaGPUs []CudaGPUInfo
  43. nvcudaLibPath string
  44. cudartLibPath string
  45. oneapiLibPath string
  46. nvmlLibPath string
  47. rocmGPUs []RocmGPUInfo
  48. oneapiGPUs []OneapiGPUInfo
  49. // If any discovered GPUs are incompatible, report why
  50. unsupportedGPUs []UnsupportedGPUInfo
  51. // Keep track of errors during bootstrapping so that if GPUs are missing
  52. // they expected to be present this may explain why
  53. bootstrapErrors []error
  54. )
  55. // With our current CUDA compile flags, older than 5.0 will not work properly
  56. // (string values used to allow ldflags overrides at build time)
  57. var (
  58. CudaComputeMajorMin = "5"
  59. CudaComputeMinorMin = "0"
  60. )
  61. var RocmComputeMajorMin = "9"
  62. // TODO find a better way to detect iGPU instead of minimum memory
  63. const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
  64. // Note: gpuMutex must already be held
  65. func initCudaHandles() *cudaHandles {
  66. // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
  67. cHandles := &cudaHandles{}
  68. // Short Circuit if we already know which library to use
  69. // ignore bootstrap errors in this case since we already recorded them
  70. if nvmlLibPath != "" {
  71. cHandles.nvml, _, _ = loadNVMLMgmt([]string{nvmlLibPath})
  72. return cHandles
  73. }
  74. if nvcudaLibPath != "" {
  75. cHandles.deviceCount, cHandles.nvcuda, _, _ = loadNVCUDAMgmt([]string{nvcudaLibPath})
  76. return cHandles
  77. }
  78. if cudartLibPath != "" {
  79. cHandles.deviceCount, cHandles.cudart, _, _ = loadCUDARTMgmt([]string{cudartLibPath})
  80. return cHandles
  81. }
  82. slog.Debug("searching for GPU discovery libraries for NVIDIA")
  83. var cudartMgmtPatterns []string
  84. // Aligned with driver, we can't carry as payloads
  85. nvcudaMgmtPatterns := NvcudaGlobs
  86. cudartMgmtPatterns = append(cudartMgmtPatterns, filepath.Join(LibOllamaPath, "cuda_v*", CudartMgmtName))
  87. cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
  88. if len(NvmlGlobs) > 0 {
  89. nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
  90. if len(nvmlLibPaths) > 0 {
  91. nvml, libPath, err := loadNVMLMgmt(nvmlLibPaths)
  92. if nvml != nil {
  93. slog.Debug("nvidia-ml loaded", "library", libPath)
  94. cHandles.nvml = nvml
  95. nvmlLibPath = libPath
  96. }
  97. if err != nil {
  98. bootstrapErrors = append(bootstrapErrors, err)
  99. }
  100. }
  101. }
  102. nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
  103. if len(nvcudaLibPaths) > 0 {
  104. deviceCount, nvcuda, libPath, err := loadNVCUDAMgmt(nvcudaLibPaths)
  105. if nvcuda != nil {
  106. slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
  107. cHandles.nvcuda = nvcuda
  108. cHandles.deviceCount = deviceCount
  109. nvcudaLibPath = libPath
  110. return cHandles
  111. }
  112. if err != nil {
  113. bootstrapErrors = append(bootstrapErrors, err)
  114. }
  115. }
  116. cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
  117. if len(cudartLibPaths) > 0 {
  118. deviceCount, cudart, libPath, err := loadCUDARTMgmt(cudartLibPaths)
  119. if cudart != nil {
  120. slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
  121. cHandles.cudart = cudart
  122. cHandles.deviceCount = deviceCount
  123. cudartLibPath = libPath
  124. return cHandles
  125. }
  126. if err != nil {
  127. bootstrapErrors = append(bootstrapErrors, err)
  128. }
  129. }
  130. return cHandles
  131. }
  132. // Note: gpuMutex must already be held
  133. func initOneAPIHandles() *oneapiHandles {
  134. oHandles := &oneapiHandles{}
  135. // Short Circuit if we already know which library to use
  136. // ignore bootstrap errors in this case since we already recorded them
  137. if oneapiLibPath != "" {
  138. oHandles.deviceCount, oHandles.oneapi, _, _ = loadOneapiMgmt([]string{oneapiLibPath})
  139. return oHandles
  140. }
  141. oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
  142. if len(oneapiLibPaths) > 0 {
  143. var err error
  144. oHandles.deviceCount, oHandles.oneapi, oneapiLibPath, err = loadOneapiMgmt(oneapiLibPaths)
  145. if err != nil {
  146. bootstrapErrors = append(bootstrapErrors, err)
  147. }
  148. }
  149. return oHandles
  150. }
  151. func GetCPUInfo() GpuInfoList {
  152. gpuMutex.Lock()
  153. if !bootstrapped {
  154. gpuMutex.Unlock()
  155. GetGPUInfo()
  156. } else {
  157. gpuMutex.Unlock()
  158. }
  159. return GpuInfoList{cpus[0].GpuInfo}
  160. }
  161. func GetGPUInfo() GpuInfoList {
  162. // TODO - consider exploring lspci (and equivalent on windows) to check for
  163. // GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
  164. gpuMutex.Lock()
  165. defer gpuMutex.Unlock()
  166. needRefresh := true
  167. var cHandles *cudaHandles
  168. var oHandles *oneapiHandles
  169. defer func() {
  170. if cHandles != nil {
  171. if cHandles.cudart != nil {
  172. C.cudart_release(*cHandles.cudart)
  173. }
  174. if cHandles.nvcuda != nil {
  175. C.nvcuda_release(*cHandles.nvcuda)
  176. }
  177. if cHandles.nvml != nil {
  178. C.nvml_release(*cHandles.nvml)
  179. }
  180. }
  181. if oHandles != nil {
  182. if oHandles.oneapi != nil {
  183. // TODO - is this needed?
  184. C.oneapi_release(*oHandles.oneapi)
  185. }
  186. }
  187. }()
  188. if !bootstrapped {
  189. slog.Info("looking for compatible GPUs")
  190. cudaComputeMajorMin, err := strconv.Atoi(CudaComputeMajorMin)
  191. if err != nil {
  192. slog.Error("invalid CudaComputeMajorMin setting", "value", CudaComputeMajorMin, "error", err)
  193. }
  194. cudaComputeMinorMin, err := strconv.Atoi(CudaComputeMinorMin)
  195. if err != nil {
  196. slog.Error("invalid CudaComputeMinorMin setting", "value", CudaComputeMinorMin, "error", err)
  197. }
  198. bootstrapErrors = []error{}
  199. needRefresh = false
  200. var memInfo C.mem_info_t
  201. mem, err := GetCPUMem()
  202. if err != nil {
  203. slog.Warn("error looking up system memory", "error", err)
  204. }
  205. details, err := GetCPUDetails()
  206. if err != nil {
  207. slog.Warn("failed to lookup CPU details", "error", err)
  208. }
  209. cpus = []CPUInfo{
  210. {
  211. GpuInfo: GpuInfo{
  212. memInfo: mem,
  213. Library: "cpu",
  214. ID: "0",
  215. },
  216. CPUs: details,
  217. },
  218. }
  219. // Load ALL libraries
  220. cHandles = initCudaHandles()
  221. // NVIDIA
  222. for i := range cHandles.deviceCount {
  223. if cHandles.cudart != nil || cHandles.nvcuda != nil {
  224. gpuInfo := CudaGPUInfo{
  225. GpuInfo: GpuInfo{
  226. Library: "cuda",
  227. },
  228. index: i,
  229. }
  230. var driverMajor int
  231. var driverMinor int
  232. if cHandles.cudart != nil {
  233. C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
  234. } else {
  235. C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
  236. driverMajor = int(cHandles.nvcuda.driver_major)
  237. driverMinor = int(cHandles.nvcuda.driver_minor)
  238. }
  239. if memInfo.err != nil {
  240. slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  241. C.free(unsafe.Pointer(memInfo.err))
  242. continue
  243. }
  244. gpuInfo.TotalMemory = uint64(memInfo.total)
  245. gpuInfo.FreeMemory = uint64(memInfo.free)
  246. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  247. gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
  248. gpuInfo.computeMajor = int(memInfo.major)
  249. gpuInfo.computeMinor = int(memInfo.minor)
  250. gpuInfo.MinimumMemory = cudaMinimumMemory
  251. gpuInfo.DriverMajor = driverMajor
  252. gpuInfo.DriverMinor = driverMinor
  253. variant := cudaVariant(gpuInfo)
  254. // Start with our bundled libraries
  255. if variant != "" {
  256. variantPath := filepath.Join(LibOllamaPath, "cuda_"+variant)
  257. if _, err := os.Stat(variantPath); err == nil {
  258. // Put the variant directory first in the search path to avoid runtime linking to the wrong library
  259. gpuInfo.DependencyPath = append([]string{variantPath}, gpuInfo.DependencyPath...)
  260. }
  261. }
  262. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  263. gpuInfo.Variant = variant
  264. if int(memInfo.major) < cudaComputeMajorMin || (int(memInfo.major) == cudaComputeMajorMin && int(memInfo.minor) < cudaComputeMinorMin) {
  265. unsupportedGPUs = append(unsupportedGPUs,
  266. UnsupportedGPUInfo{
  267. GpuInfo: gpuInfo.GpuInfo,
  268. })
  269. slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
  270. continue
  271. }
  272. // query the management library as well so we can record any skew between the two
  273. // which represents overhead on the GPU we must set aside on subsequent updates
  274. if cHandles.nvml != nil {
  275. uuid := C.CString(gpuInfo.ID)
  276. defer C.free(unsafe.Pointer(uuid))
  277. C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
  278. if memInfo.err != nil {
  279. slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  280. C.free(unsafe.Pointer(memInfo.err))
  281. } else {
  282. if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
  283. gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
  284. slog.Info("detected OS VRAM overhead",
  285. "id", gpuInfo.ID,
  286. "library", gpuInfo.Library,
  287. "compute", gpuInfo.Compute,
  288. "driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
  289. "name", gpuInfo.Name,
  290. "overhead", format.HumanBytes2(gpuInfo.OSOverhead),
  291. )
  292. }
  293. }
  294. }
  295. // TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
  296. cudaGPUs = append(cudaGPUs, gpuInfo)
  297. }
  298. }
  299. // Intel
  300. if envconfig.IntelGPU() {
  301. oHandles = initOneAPIHandles()
  302. if oHandles != nil && oHandles.oneapi != nil {
  303. for d := range oHandles.oneapi.num_drivers {
  304. if oHandles.oneapi == nil {
  305. // shouldn't happen
  306. slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
  307. continue
  308. }
  309. devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
  310. for i := range devCount {
  311. gpuInfo := OneapiGPUInfo{
  312. GpuInfo: GpuInfo{
  313. Library: "oneapi",
  314. },
  315. driverIndex: int(d),
  316. gpuIndex: int(i),
  317. }
  318. // TODO - split bootstrapping from updating free memory
  319. C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
  320. // TODO - convert this to MinimumMemory based on testing...
  321. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  322. memInfo.free = C.uint64_t(totalFreeMem)
  323. gpuInfo.TotalMemory = uint64(memInfo.total)
  324. gpuInfo.FreeMemory = uint64(memInfo.free)
  325. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  326. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  327. gpuInfo.DependencyPath = []string{LibOllamaPath}
  328. oneapiGPUs = append(oneapiGPUs, gpuInfo)
  329. }
  330. }
  331. }
  332. }
  333. rocmGPUs, err = AMDGetGPUInfo()
  334. if err != nil {
  335. bootstrapErrors = append(bootstrapErrors, err)
  336. }
  337. bootstrapped = true
  338. if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
  339. slog.Info("no compatible GPUs were discovered")
  340. }
  341. // TODO verify we have runners for the discovered GPUs, filter out any that aren't supported with good error messages
  342. }
  343. // For detected GPUs, load library if not loaded
  344. // Refresh free memory usage
  345. if needRefresh {
  346. mem, err := GetCPUMem()
  347. if err != nil {
  348. slog.Warn("error looking up system memory", "error", err)
  349. } else {
  350. slog.Debug("updating system memory data",
  351. slog.Group(
  352. "before",
  353. "total", format.HumanBytes2(cpus[0].TotalMemory),
  354. "free", format.HumanBytes2(cpus[0].FreeMemory),
  355. "free_swap", format.HumanBytes2(cpus[0].FreeSwap),
  356. ),
  357. slog.Group(
  358. "now",
  359. "total", format.HumanBytes2(mem.TotalMemory),
  360. "free", format.HumanBytes2(mem.FreeMemory),
  361. "free_swap", format.HumanBytes2(mem.FreeSwap),
  362. ),
  363. )
  364. cpus[0].FreeMemory = mem.FreeMemory
  365. cpus[0].FreeSwap = mem.FreeSwap
  366. }
  367. var memInfo C.mem_info_t
  368. if cHandles == nil && len(cudaGPUs) > 0 {
  369. cHandles = initCudaHandles()
  370. }
  371. for i, gpu := range cudaGPUs {
  372. if cHandles.nvml != nil {
  373. uuid := C.CString(gpu.ID)
  374. defer C.free(unsafe.Pointer(uuid))
  375. C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
  376. } else if cHandles.cudart != nil {
  377. C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
  378. } else if cHandles.nvcuda != nil {
  379. C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
  380. memInfo.used = memInfo.total - memInfo.free
  381. } else {
  382. // shouldn't happen
  383. slog.Warn("no valid cuda library loaded to refresh vram usage")
  384. break
  385. }
  386. if memInfo.err != nil {
  387. slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  388. C.free(unsafe.Pointer(memInfo.err))
  389. continue
  390. }
  391. if memInfo.free == 0 {
  392. slog.Warn("error looking up nvidia GPU memory")
  393. continue
  394. }
  395. if cHandles.nvml != nil && gpu.OSOverhead > 0 {
  396. // When using the management library update based on recorded overhead
  397. memInfo.free -= C.uint64_t(gpu.OSOverhead)
  398. }
  399. slog.Debug("updating cuda memory data",
  400. "gpu", gpu.ID,
  401. "name", gpu.Name,
  402. "overhead", format.HumanBytes2(gpu.OSOverhead),
  403. slog.Group(
  404. "before",
  405. "total", format.HumanBytes2(gpu.TotalMemory),
  406. "free", format.HumanBytes2(gpu.FreeMemory),
  407. ),
  408. slog.Group(
  409. "now",
  410. "total", format.HumanBytes2(uint64(memInfo.total)),
  411. "free", format.HumanBytes2(uint64(memInfo.free)),
  412. "used", format.HumanBytes2(uint64(memInfo.used)),
  413. ),
  414. )
  415. cudaGPUs[i].FreeMemory = uint64(memInfo.free)
  416. }
  417. if oHandles == nil && len(oneapiGPUs) > 0 {
  418. oHandles = initOneAPIHandles()
  419. }
  420. for i, gpu := range oneapiGPUs {
  421. if oHandles.oneapi == nil {
  422. // shouldn't happen
  423. slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
  424. continue
  425. }
  426. C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
  427. // TODO - convert this to MinimumMemory based on testing...
  428. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  429. memInfo.free = C.uint64_t(totalFreeMem)
  430. oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
  431. }
  432. err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
  433. if err != nil {
  434. slog.Debug("problem refreshing ROCm free memory", "error", err)
  435. }
  436. }
  437. resp := []GpuInfo{}
  438. for _, gpu := range cudaGPUs {
  439. resp = append(resp, gpu.GpuInfo)
  440. }
  441. for _, gpu := range rocmGPUs {
  442. resp = append(resp, gpu.GpuInfo)
  443. }
  444. for _, gpu := range oneapiGPUs {
  445. resp = append(resp, gpu.GpuInfo)
  446. }
  447. if len(resp) == 0 {
  448. resp = append(resp, cpus[0].GpuInfo)
  449. }
  450. return resp
  451. }
  452. func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
  453. // Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
  454. gpuLibPaths := []string{}
  455. slog.Debug("Searching for GPU library", "name", baseLibName)
  456. // search our bundled libraries first
  457. patterns := []string{filepath.Join(LibOllamaPath, baseLibName)}
  458. var ldPaths []string
  459. switch runtime.GOOS {
  460. case "windows":
  461. ldPaths = strings.Split(os.Getenv("PATH"), string(os.PathListSeparator))
  462. case "linux":
  463. ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), string(os.PathListSeparator))
  464. }
  465. // then search the system's LD_LIBRARY_PATH
  466. for _, p := range ldPaths {
  467. p, err := filepath.Abs(p)
  468. if err != nil {
  469. continue
  470. }
  471. patterns = append(patterns, filepath.Join(p, baseLibName))
  472. }
  473. // finally, search the default patterns provided by the caller
  474. patterns = append(patterns, defaultPatterns...)
  475. slog.Debug("gpu library search", "globs", patterns)
  476. for _, pattern := range patterns {
  477. // Nvidia PhysX known to return bogus results
  478. if strings.Contains(pattern, "PhysX") {
  479. slog.Debug("skipping PhysX cuda library path", "path", pattern)
  480. continue
  481. }
  482. // Ignore glob discovery errors
  483. matches, _ := filepath.Glob(pattern)
  484. for _, match := range matches {
  485. // Resolve any links so we don't try the same lib multiple times
  486. // and weed out any dups across globs
  487. libPath := match
  488. tmp := match
  489. var err error
  490. for ; err == nil; tmp, err = os.Readlink(libPath) {
  491. if !filepath.IsAbs(tmp) {
  492. tmp = filepath.Join(filepath.Dir(libPath), tmp)
  493. }
  494. libPath = tmp
  495. }
  496. new := true
  497. for _, cmp := range gpuLibPaths {
  498. if cmp == libPath {
  499. new = false
  500. break
  501. }
  502. }
  503. if new {
  504. gpuLibPaths = append(gpuLibPaths, libPath)
  505. }
  506. }
  507. }
  508. slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
  509. return gpuLibPaths
  510. }
  511. // Bootstrap the runtime library
  512. // Returns: num devices, handle, libPath, error
  513. func loadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string, error) {
  514. var resp C.cudart_init_resp_t
  515. resp.ch.verbose = getVerboseState()
  516. var err error
  517. for _, libPath := range cudartLibPaths {
  518. lib := C.CString(libPath)
  519. defer C.free(unsafe.Pointer(lib))
  520. C.cudart_init(lib, &resp)
  521. if resp.err != nil {
  522. err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
  523. slog.Debug(err.Error())
  524. C.free(unsafe.Pointer(resp.err))
  525. } else {
  526. err = nil
  527. return int(resp.num_devices), &resp.ch, libPath, err
  528. }
  529. }
  530. return 0, nil, "", err
  531. }
  532. // Bootstrap the driver library
  533. // Returns: num devices, handle, libPath, error
  534. func loadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string, error) {
  535. var resp C.nvcuda_init_resp_t
  536. resp.ch.verbose = getVerboseState()
  537. var err error
  538. for _, libPath := range nvcudaLibPaths {
  539. lib := C.CString(libPath)
  540. defer C.free(unsafe.Pointer(lib))
  541. C.nvcuda_init(lib, &resp)
  542. if resp.err != nil {
  543. // Decide what log level based on the type of error message to help users understand why
  544. switch resp.cudaErr {
  545. case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
  546. err = fmt.Errorf("version mismatch between driver and cuda driver library - reboot or upgrade may be required: library %s", libPath)
  547. slog.Warn(err.Error())
  548. case C.CUDA_ERROR_NO_DEVICE:
  549. err = fmt.Errorf("no nvidia devices detected by library %s", libPath)
  550. slog.Info(err.Error())
  551. case C.CUDA_ERROR_UNKNOWN:
  552. err = fmt.Errorf("unknown error initializing cuda driver library %s: %s. see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information", libPath, C.GoString(resp.err))
  553. slog.Warn(err.Error())
  554. default:
  555. msg := C.GoString(resp.err)
  556. if strings.Contains(msg, "wrong ELF class") {
  557. slog.Debug("skipping 32bit library", "library", libPath)
  558. } else {
  559. err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
  560. slog.Info(err.Error())
  561. }
  562. }
  563. C.free(unsafe.Pointer(resp.err))
  564. } else {
  565. err = nil
  566. return int(resp.num_devices), &resp.ch, libPath, err
  567. }
  568. }
  569. return 0, nil, "", err
  570. }
  571. // Bootstrap the management library
  572. // Returns: handle, libPath, error
  573. func loadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string, error) {
  574. var resp C.nvml_init_resp_t
  575. resp.ch.verbose = getVerboseState()
  576. var err error
  577. for _, libPath := range nvmlLibPaths {
  578. lib := C.CString(libPath)
  579. defer C.free(unsafe.Pointer(lib))
  580. C.nvml_init(lib, &resp)
  581. if resp.err != nil {
  582. err = fmt.Errorf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err))
  583. slog.Info(err.Error())
  584. C.free(unsafe.Pointer(resp.err))
  585. } else {
  586. err = nil
  587. return &resp.ch, libPath, err
  588. }
  589. }
  590. return nil, "", err
  591. }
  592. // bootstrap the Intel GPU library
  593. // Returns: num devices, handle, libPath, error
  594. func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, error) {
  595. var resp C.oneapi_init_resp_t
  596. num_devices := 0
  597. resp.oh.verbose = getVerboseState()
  598. var err error
  599. for _, libPath := range oneapiLibPaths {
  600. lib := C.CString(libPath)
  601. defer C.free(unsafe.Pointer(lib))
  602. C.oneapi_init(lib, &resp)
  603. if resp.err != nil {
  604. err = fmt.Errorf("Unable to load oneAPI management library %s: %s", libPath, C.GoString(resp.err))
  605. slog.Debug(err.Error())
  606. C.free(unsafe.Pointer(resp.err))
  607. } else {
  608. err = nil
  609. for i := range resp.oh.num_drivers {
  610. num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
  611. }
  612. return num_devices, &resp.oh, libPath, err
  613. }
  614. }
  615. return 0, nil, "", err
  616. }
  617. func getVerboseState() C.uint16_t {
  618. if envconfig.Debug() {
  619. return C.uint16_t(1)
  620. }
  621. return C.uint16_t(0)
  622. }
  623. // Given the list of GPUs this instantiation is targeted for,
  624. // figure out the visible devices environment variable
  625. //
  626. // If different libraries are detected, the first one is what we use
  627. func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
  628. if len(l) == 0 {
  629. return "", ""
  630. }
  631. switch l[0].Library {
  632. case "cuda":
  633. return cudaGetVisibleDevicesEnv(l)
  634. case "rocm":
  635. return rocmGetVisibleDevicesEnv(l)
  636. case "oneapi":
  637. return oneapiGetVisibleDevicesEnv(l)
  638. default:
  639. slog.Debug("no filter required for library " + l[0].Library)
  640. return "", ""
  641. }
  642. }
  643. func GetSystemInfo() SystemInfo {
  644. gpus := GetGPUInfo()
  645. gpuMutex.Lock()
  646. defer gpuMutex.Unlock()
  647. discoveryErrors := []string{}
  648. for _, err := range bootstrapErrors {
  649. discoveryErrors = append(discoveryErrors, err.Error())
  650. }
  651. if len(gpus) == 1 && gpus[0].Library == "cpu" {
  652. gpus = []GpuInfo{}
  653. }
  654. return SystemInfo{
  655. System: cpus[0],
  656. GPUs: gpus,
  657. UnsupportedGPUs: unsupportedGPUs,
  658. DiscoveryErrors: discoveryErrors,
  659. }
  660. }