gpu.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718
  1. //go:build linux || windows
  2. package discover
  3. /*
  4. #cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
  5. #cgo windows LDFLAGS: -lpthread
  6. #include "gpu_info.h"
  7. */
  8. import "C"
  9. import (
  10. "fmt"
  11. "log/slog"
  12. "os"
  13. "path/filepath"
  14. "runtime"
  15. "strconv"
  16. "strings"
  17. "sync"
  18. "unsafe"
  19. "github.com/ollama/ollama/envconfig"
  20. "github.com/ollama/ollama/format"
  21. )
  22. type cudaHandles struct {
  23. deviceCount int
  24. cudart *C.cudart_handle_t
  25. nvcuda *C.nvcuda_handle_t
  26. nvml *C.nvml_handle_t
  27. }
  28. type oneapiHandles struct {
  29. oneapi *C.oneapi_handle_t
  30. deviceCount int
  31. }
  32. const (
  33. cudaMinimumMemory = 457 * format.MebiByte
  34. rocmMinimumMemory = 457 * format.MebiByte
  35. // TODO OneAPI minimum memory
  36. )
  37. var (
  38. gpuMutex sync.Mutex
  39. bootstrapped bool
  40. cpus []CPUInfo
  41. cudaGPUs []CudaGPUInfo
  42. nvcudaLibPath string
  43. cudartLibPath string
  44. oneapiLibPath string
  45. nvmlLibPath string
  46. rocmGPUs []RocmGPUInfo
  47. oneapiGPUs []OneapiGPUInfo
  48. // If any discovered GPUs are incompatible, report why
  49. unsupportedGPUs []UnsupportedGPUInfo
  50. // Keep track of errors during bootstrapping so that if GPUs are missing
  51. // they expected to be present this may explain why
  52. bootstrapErrors []error
  53. )
  54. // With our current CUDA compile flags, older than 5.0 will not work properly
  55. // (string values used to allow ldflags overrides at build time)
  56. var (
  57. CudaComputeMajorMin = "5"
  58. CudaComputeMinorMin = "0"
  59. )
  60. var RocmComputeMajorMin = "9"
  61. // TODO find a better way to detect iGPU instead of minimum memory
  62. const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
  63. // Note: gpuMutex must already be held
  64. func initCudaHandles() *cudaHandles {
  65. // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
  66. cHandles := &cudaHandles{}
  67. // Short Circuit if we already know which library to use
  68. // ignore bootstrap errors in this case since we already recorded them
  69. if nvmlLibPath != "" {
  70. cHandles.nvml, _, _ = loadNVMLMgmt([]string{nvmlLibPath})
  71. return cHandles
  72. }
  73. if nvcudaLibPath != "" {
  74. cHandles.deviceCount, cHandles.nvcuda, _, _ = loadNVCUDAMgmt([]string{nvcudaLibPath})
  75. return cHandles
  76. }
  77. if cudartLibPath != "" {
  78. cHandles.deviceCount, cHandles.cudart, _, _ = loadCUDARTMgmt([]string{cudartLibPath})
  79. return cHandles
  80. }
  81. slog.Debug("searching for GPU discovery libraries for NVIDIA")
  82. var cudartMgmtPatterns []string
  83. // Aligned with driver, we can't carry as payloads
  84. nvcudaMgmtPatterns := NvcudaGlobs
  85. cudartMgmtPatterns = append(cudartMgmtPatterns, filepath.Join(LibOllamaPath, "cuda_v*", CudartMgmtName))
  86. cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
  87. if len(NvmlGlobs) > 0 {
  88. nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
  89. if len(nvmlLibPaths) > 0 {
  90. nvml, libPath, err := loadNVMLMgmt(nvmlLibPaths)
  91. if nvml != nil {
  92. slog.Debug("nvidia-ml loaded", "library", libPath)
  93. cHandles.nvml = nvml
  94. nvmlLibPath = libPath
  95. }
  96. if err != nil {
  97. bootstrapErrors = append(bootstrapErrors, err)
  98. }
  99. }
  100. }
  101. nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
  102. if len(nvcudaLibPaths) > 0 {
  103. deviceCount, nvcuda, libPath, err := loadNVCUDAMgmt(nvcudaLibPaths)
  104. if nvcuda != nil {
  105. slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
  106. cHandles.nvcuda = nvcuda
  107. cHandles.deviceCount = deviceCount
  108. nvcudaLibPath = libPath
  109. return cHandles
  110. }
  111. if err != nil {
  112. bootstrapErrors = append(bootstrapErrors, err)
  113. }
  114. }
  115. cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
  116. if len(cudartLibPaths) > 0 {
  117. deviceCount, cudart, libPath, err := loadCUDARTMgmt(cudartLibPaths)
  118. if cudart != nil {
  119. slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
  120. cHandles.cudart = cudart
  121. cHandles.deviceCount = deviceCount
  122. cudartLibPath = libPath
  123. return cHandles
  124. }
  125. if err != nil {
  126. bootstrapErrors = append(bootstrapErrors, err)
  127. }
  128. }
  129. return cHandles
  130. }
  131. // Note: gpuMutex must already be held
  132. func initOneAPIHandles() *oneapiHandles {
  133. oHandles := &oneapiHandles{}
  134. // Short Circuit if we already know which library to use
  135. // ignore bootstrap errors in this case since we already recorded them
  136. if oneapiLibPath != "" {
  137. oHandles.deviceCount, oHandles.oneapi, _, _ = loadOneapiMgmt([]string{oneapiLibPath})
  138. return oHandles
  139. }
  140. oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
  141. if len(oneapiLibPaths) > 0 {
  142. var err error
  143. oHandles.deviceCount, oHandles.oneapi, oneapiLibPath, err = loadOneapiMgmt(oneapiLibPaths)
  144. if err != nil {
  145. bootstrapErrors = append(bootstrapErrors, err)
  146. }
  147. }
  148. return oHandles
  149. }
  150. func GetCPUInfo() GpuInfoList {
  151. gpuMutex.Lock()
  152. if !bootstrapped {
  153. gpuMutex.Unlock()
  154. GetGPUInfo()
  155. } else {
  156. gpuMutex.Unlock()
  157. }
  158. return GpuInfoList{cpus[0].GpuInfo}
  159. }
  160. func GetGPUInfo() GpuInfoList {
  161. // TODO - consider exploring lspci (and equivalent on windows) to check for
  162. // GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
  163. gpuMutex.Lock()
  164. defer gpuMutex.Unlock()
  165. needRefresh := true
  166. var cHandles *cudaHandles
  167. var oHandles *oneapiHandles
  168. defer func() {
  169. if cHandles != nil {
  170. if cHandles.cudart != nil {
  171. C.cudart_release(*cHandles.cudart)
  172. }
  173. if cHandles.nvcuda != nil {
  174. C.nvcuda_release(*cHandles.nvcuda)
  175. }
  176. if cHandles.nvml != nil {
  177. C.nvml_release(*cHandles.nvml)
  178. }
  179. }
  180. if oHandles != nil {
  181. if oHandles.oneapi != nil {
  182. // TODO - is this needed?
  183. C.oneapi_release(*oHandles.oneapi)
  184. }
  185. }
  186. }()
  187. if !bootstrapped {
  188. slog.Info("looking for compatible GPUs")
  189. cudaComputeMajorMin, err := strconv.Atoi(CudaComputeMajorMin)
  190. if err != nil {
  191. slog.Error("invalid CudaComputeMajorMin setting", "value", CudaComputeMajorMin, "error", err)
  192. }
  193. cudaComputeMinorMin, err := strconv.Atoi(CudaComputeMinorMin)
  194. if err != nil {
  195. slog.Error("invalid CudaComputeMinorMin setting", "value", CudaComputeMinorMin, "error", err)
  196. }
  197. bootstrapErrors = []error{}
  198. needRefresh = false
  199. var memInfo C.mem_info_t
  200. mem, err := GetCPUMem()
  201. if err != nil {
  202. slog.Warn("error looking up system memory", "error", err)
  203. }
  204. details, err := GetCPUDetails()
  205. if err != nil {
  206. slog.Warn("failed to lookup CPU details", "error", err)
  207. }
  208. cpus = []CPUInfo{
  209. {
  210. GpuInfo: GpuInfo{
  211. memInfo: mem,
  212. Library: "cpu",
  213. ID: "0",
  214. },
  215. CPUs: details,
  216. },
  217. }
  218. // Load ALL libraries
  219. cHandles = initCudaHandles()
  220. // NVIDIA
  221. for i := range cHandles.deviceCount {
  222. if cHandles.cudart != nil || cHandles.nvcuda != nil {
  223. gpuInfo := CudaGPUInfo{
  224. GpuInfo: GpuInfo{
  225. Library: "cuda",
  226. },
  227. index: i,
  228. }
  229. var driverMajor int
  230. var driverMinor int
  231. if cHandles.cudart != nil {
  232. C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
  233. } else {
  234. C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
  235. driverMajor = int(cHandles.nvcuda.driver_major)
  236. driverMinor = int(cHandles.nvcuda.driver_minor)
  237. }
  238. if memInfo.err != nil {
  239. slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  240. C.free(unsafe.Pointer(memInfo.err))
  241. continue
  242. }
  243. gpuInfo.TotalMemory = uint64(memInfo.total)
  244. gpuInfo.FreeMemory = uint64(memInfo.free)
  245. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  246. gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
  247. gpuInfo.computeMajor = int(memInfo.major)
  248. gpuInfo.computeMinor = int(memInfo.minor)
  249. gpuInfo.MinimumMemory = cudaMinimumMemory
  250. gpuInfo.DriverMajor = driverMajor
  251. gpuInfo.DriverMinor = driverMinor
  252. variant := cudaVariant(gpuInfo)
  253. // Start with our bundled libraries
  254. if variant != "" {
  255. variantPath := filepath.Join(LibOllamaPath, "cuda_"+variant)
  256. if _, err := os.Stat(variantPath); err == nil {
  257. // Put the variant directory first in the search path to avoid runtime linking to the wrong library
  258. gpuInfo.DependencyPath = append([]string{variantPath}, gpuInfo.DependencyPath...)
  259. }
  260. }
  261. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  262. gpuInfo.Variant = variant
  263. if int(memInfo.major) < cudaComputeMajorMin || (int(memInfo.major) == cudaComputeMajorMin && int(memInfo.minor) < cudaComputeMinorMin) {
  264. unsupportedGPUs = append(unsupportedGPUs,
  265. UnsupportedGPUInfo{
  266. GpuInfo: gpuInfo.GpuInfo,
  267. })
  268. slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
  269. continue
  270. }
  271. // query the management library as well so we can record any skew between the two
  272. // which represents overhead on the GPU we must set aside on subsequent updates
  273. if cHandles.nvml != nil {
  274. uuid := C.CString(gpuInfo.ID)
  275. defer C.free(unsafe.Pointer(uuid))
  276. C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
  277. if memInfo.err != nil {
  278. slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  279. C.free(unsafe.Pointer(memInfo.err))
  280. } else {
  281. if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
  282. gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
  283. slog.Info("detected OS VRAM overhead",
  284. "id", gpuInfo.ID,
  285. "library", gpuInfo.Library,
  286. "compute", gpuInfo.Compute,
  287. "driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
  288. "name", gpuInfo.Name,
  289. "overhead", format.HumanBytes2(gpuInfo.OSOverhead),
  290. )
  291. }
  292. }
  293. }
  294. // TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
  295. cudaGPUs = append(cudaGPUs, gpuInfo)
  296. }
  297. }
  298. // Intel
  299. if envconfig.IntelGPU() {
  300. oHandles = initOneAPIHandles()
  301. if oHandles != nil && oHandles.oneapi != nil {
  302. for d := range oHandles.oneapi.num_drivers {
  303. if oHandles.oneapi == nil {
  304. // shouldn't happen
  305. slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
  306. continue
  307. }
  308. devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
  309. for i := range devCount {
  310. gpuInfo := OneapiGPUInfo{
  311. GpuInfo: GpuInfo{
  312. Library: "oneapi",
  313. },
  314. driverIndex: int(d),
  315. gpuIndex: int(i),
  316. }
  317. // TODO - split bootstrapping from updating free memory
  318. C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
  319. // TODO - convert this to MinimumMemory based on testing...
  320. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  321. memInfo.free = C.uint64_t(totalFreeMem)
  322. gpuInfo.TotalMemory = uint64(memInfo.total)
  323. gpuInfo.FreeMemory = uint64(memInfo.free)
  324. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  325. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  326. gpuInfo.DependencyPath = []string{LibOllamaPath}
  327. oneapiGPUs = append(oneapiGPUs, gpuInfo)
  328. }
  329. }
  330. }
  331. }
  332. rocmGPUs, err = AMDGetGPUInfo()
  333. if err != nil {
  334. bootstrapErrors = append(bootstrapErrors, err)
  335. }
  336. bootstrapped = true
  337. if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
  338. slog.Info("no compatible GPUs were discovered")
  339. }
  340. // TODO verify we have runners for the discovered GPUs, filter out any that aren't supported with good error messages
  341. }
  342. // For detected GPUs, load library if not loaded
  343. // Refresh free memory usage
  344. if needRefresh {
  345. mem, err := GetCPUMem()
  346. if err != nil {
  347. slog.Warn("error looking up system memory", "error", err)
  348. } else {
  349. slog.Debug("updating system memory data",
  350. slog.Group(
  351. "before",
  352. "total", format.HumanBytes2(cpus[0].TotalMemory),
  353. "free", format.HumanBytes2(cpus[0].FreeMemory),
  354. "free_swap", format.HumanBytes2(cpus[0].FreeSwap),
  355. ),
  356. slog.Group(
  357. "now",
  358. "total", format.HumanBytes2(mem.TotalMemory),
  359. "free", format.HumanBytes2(mem.FreeMemory),
  360. "free_swap", format.HumanBytes2(mem.FreeSwap),
  361. ),
  362. )
  363. cpus[0].FreeMemory = mem.FreeMemory
  364. cpus[0].FreeSwap = mem.FreeSwap
  365. }
  366. var memInfo C.mem_info_t
  367. if cHandles == nil && len(cudaGPUs) > 0 {
  368. cHandles = initCudaHandles()
  369. }
  370. for i, gpu := range cudaGPUs {
  371. if cHandles.nvml != nil {
  372. uuid := C.CString(gpu.ID)
  373. defer C.free(unsafe.Pointer(uuid))
  374. C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
  375. } else if cHandles.cudart != nil {
  376. C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
  377. } else if cHandles.nvcuda != nil {
  378. C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
  379. memInfo.used = memInfo.total - memInfo.free
  380. } else {
  381. // shouldn't happen
  382. slog.Warn("no valid cuda library loaded to refresh vram usage")
  383. break
  384. }
  385. if memInfo.err != nil {
  386. slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  387. C.free(unsafe.Pointer(memInfo.err))
  388. continue
  389. }
  390. if memInfo.free == 0 {
  391. slog.Warn("error looking up nvidia GPU memory")
  392. continue
  393. }
  394. if cHandles.nvml != nil && gpu.OSOverhead > 0 {
  395. // When using the management library update based on recorded overhead
  396. memInfo.free -= C.uint64_t(gpu.OSOverhead)
  397. }
  398. slog.Debug("updating cuda memory data",
  399. "gpu", gpu.ID,
  400. "name", gpu.Name,
  401. "overhead", format.HumanBytes2(gpu.OSOverhead),
  402. slog.Group(
  403. "before",
  404. "total", format.HumanBytes2(gpu.TotalMemory),
  405. "free", format.HumanBytes2(gpu.FreeMemory),
  406. ),
  407. slog.Group(
  408. "now",
  409. "total", format.HumanBytes2(uint64(memInfo.total)),
  410. "free", format.HumanBytes2(uint64(memInfo.free)),
  411. "used", format.HumanBytes2(uint64(memInfo.used)),
  412. ),
  413. )
  414. cudaGPUs[i].FreeMemory = uint64(memInfo.free)
  415. }
  416. if oHandles == nil && len(oneapiGPUs) > 0 {
  417. oHandles = initOneAPIHandles()
  418. }
  419. for i, gpu := range oneapiGPUs {
  420. if oHandles.oneapi == nil {
  421. // shouldn't happen
  422. slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
  423. continue
  424. }
  425. C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
  426. // TODO - convert this to MinimumMemory based on testing...
  427. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  428. memInfo.free = C.uint64_t(totalFreeMem)
  429. oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
  430. }
  431. err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
  432. if err != nil {
  433. slog.Debug("problem refreshing ROCm free memory", "error", err)
  434. }
  435. }
  436. resp := []GpuInfo{}
  437. for _, gpu := range cudaGPUs {
  438. resp = append(resp, gpu.GpuInfo)
  439. }
  440. for _, gpu := range rocmGPUs {
  441. resp = append(resp, gpu.GpuInfo)
  442. }
  443. for _, gpu := range oneapiGPUs {
  444. resp = append(resp, gpu.GpuInfo)
  445. }
  446. if len(resp) == 0 {
  447. resp = append(resp, cpus[0].GpuInfo)
  448. }
  449. return resp
  450. }
  451. func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
  452. // Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
  453. gpuLibPaths := []string{}
  454. slog.Debug("Searching for GPU library", "name", baseLibName)
  455. // search our bundled libraries first
  456. patterns := []string{filepath.Join(LibOllamaPath, baseLibName)}
  457. var ldPaths []string
  458. switch runtime.GOOS {
  459. case "windows":
  460. ldPaths = strings.Split(os.Getenv("PATH"), string(os.PathListSeparator))
  461. case "linux":
  462. ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), string(os.PathListSeparator))
  463. }
  464. // then search the system's LD_LIBRARY_PATH
  465. for _, p := range ldPaths {
  466. p, err := filepath.Abs(p)
  467. if err != nil {
  468. continue
  469. }
  470. patterns = append(patterns, filepath.Join(p, baseLibName))
  471. }
  472. // finally, search the default patterns provided by the caller
  473. patterns = append(patterns, defaultPatterns...)
  474. slog.Debug("gpu library search", "globs", patterns)
  475. for _, pattern := range patterns {
  476. // Nvidia PhysX known to return bogus results
  477. if strings.Contains(pattern, "PhysX") {
  478. slog.Debug("skipping PhysX cuda library path", "path", pattern)
  479. continue
  480. }
  481. // Ignore glob discovery errors
  482. matches, _ := filepath.Glob(pattern)
  483. for _, match := range matches {
  484. // Resolve any links so we don't try the same lib multiple times
  485. // and weed out any dups across globs
  486. libPath := match
  487. tmp := match
  488. var err error
  489. for ; err == nil; tmp, err = os.Readlink(libPath) {
  490. if !filepath.IsAbs(tmp) {
  491. tmp = filepath.Join(filepath.Dir(libPath), tmp)
  492. }
  493. libPath = tmp
  494. }
  495. new := true
  496. for _, cmp := range gpuLibPaths {
  497. if cmp == libPath {
  498. new = false
  499. break
  500. }
  501. }
  502. if new {
  503. gpuLibPaths = append(gpuLibPaths, libPath)
  504. }
  505. }
  506. }
  507. slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
  508. return gpuLibPaths
  509. }
  510. // Bootstrap the runtime library
  511. // Returns: num devices, handle, libPath, error
  512. func loadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string, error) {
  513. var resp C.cudart_init_resp_t
  514. resp.ch.verbose = getVerboseState()
  515. var err error
  516. for _, libPath := range cudartLibPaths {
  517. lib := C.CString(libPath)
  518. defer C.free(unsafe.Pointer(lib))
  519. C.cudart_init(lib, &resp)
  520. if resp.err != nil {
  521. err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
  522. slog.Debug(err.Error())
  523. C.free(unsafe.Pointer(resp.err))
  524. } else {
  525. err = nil
  526. return int(resp.num_devices), &resp.ch, libPath, err
  527. }
  528. }
  529. return 0, nil, "", err
  530. }
  531. // Bootstrap the driver library
  532. // Returns: num devices, handle, libPath, error
  533. func loadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string, error) {
  534. var resp C.nvcuda_init_resp_t
  535. resp.ch.verbose = getVerboseState()
  536. var err error
  537. for _, libPath := range nvcudaLibPaths {
  538. lib := C.CString(libPath)
  539. defer C.free(unsafe.Pointer(lib))
  540. C.nvcuda_init(lib, &resp)
  541. if resp.err != nil {
  542. // Decide what log level based on the type of error message to help users understand why
  543. switch resp.cudaErr {
  544. case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
  545. err = fmt.Errorf("version mismatch between driver and cuda driver library - reboot or upgrade may be required: library %s", libPath)
  546. slog.Warn(err.Error())
  547. case C.CUDA_ERROR_NO_DEVICE:
  548. err = fmt.Errorf("no nvidia devices detected by library %s", libPath)
  549. slog.Info(err.Error())
  550. case C.CUDA_ERROR_UNKNOWN:
  551. err = fmt.Errorf("unknown error initializing cuda driver library %s: %s. see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information", libPath, C.GoString(resp.err))
  552. slog.Warn(err.Error())
  553. default:
  554. msg := C.GoString(resp.err)
  555. if strings.Contains(msg, "wrong ELF class") {
  556. slog.Debug("skipping 32bit library", "library", libPath)
  557. } else {
  558. err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
  559. slog.Info(err.Error())
  560. }
  561. }
  562. C.free(unsafe.Pointer(resp.err))
  563. } else {
  564. err = nil
  565. return int(resp.num_devices), &resp.ch, libPath, err
  566. }
  567. }
  568. return 0, nil, "", err
  569. }
  570. // Bootstrap the management library
  571. // Returns: handle, libPath, error
  572. func loadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string, error) {
  573. var resp C.nvml_init_resp_t
  574. resp.ch.verbose = getVerboseState()
  575. var err error
  576. for _, libPath := range nvmlLibPaths {
  577. lib := C.CString(libPath)
  578. defer C.free(unsafe.Pointer(lib))
  579. C.nvml_init(lib, &resp)
  580. if resp.err != nil {
  581. err = fmt.Errorf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err))
  582. slog.Info(err.Error())
  583. C.free(unsafe.Pointer(resp.err))
  584. } else {
  585. err = nil
  586. return &resp.ch, libPath, err
  587. }
  588. }
  589. return nil, "", err
  590. }
  591. // bootstrap the Intel GPU library
  592. // Returns: num devices, handle, libPath, error
  593. func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, error) {
  594. var resp C.oneapi_init_resp_t
  595. num_devices := 0
  596. resp.oh.verbose = getVerboseState()
  597. var err error
  598. for _, libPath := range oneapiLibPaths {
  599. lib := C.CString(libPath)
  600. defer C.free(unsafe.Pointer(lib))
  601. C.oneapi_init(lib, &resp)
  602. if resp.err != nil {
  603. err = fmt.Errorf("Unable to load oneAPI management library %s: %s", libPath, C.GoString(resp.err))
  604. slog.Debug(err.Error())
  605. C.free(unsafe.Pointer(resp.err))
  606. } else {
  607. err = nil
  608. for i := range resp.oh.num_drivers {
  609. num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
  610. }
  611. return num_devices, &resp.oh, libPath, err
  612. }
  613. }
  614. return 0, nil, "", err
  615. }
  616. func getVerboseState() C.uint16_t {
  617. if envconfig.Debug() {
  618. return C.uint16_t(1)
  619. }
  620. return C.uint16_t(0)
  621. }
  622. // Given the list of GPUs this instantiation is targeted for,
  623. // figure out the visible devices environment variable
  624. //
  625. // If different libraries are detected, the first one is what we use
  626. func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
  627. if len(l) == 0 {
  628. return "", ""
  629. }
  630. switch l[0].Library {
  631. case "cuda":
  632. return cudaGetVisibleDevicesEnv(l)
  633. case "rocm":
  634. return rocmGetVisibleDevicesEnv(l)
  635. case "oneapi":
  636. return oneapiGetVisibleDevicesEnv(l)
  637. default:
  638. slog.Debug("no filter required for library " + l[0].Library)
  639. return "", ""
  640. }
  641. }
  642. func GetSystemInfo() SystemInfo {
  643. gpus := GetGPUInfo()
  644. gpuMutex.Lock()
  645. defer gpuMutex.Unlock()
  646. discoveryErrors := []string{}
  647. for _, err := range bootstrapErrors {
  648. discoveryErrors = append(discoveryErrors, err.Error())
  649. }
  650. if len(gpus) == 1 && gpus[0].Library == "cpu" {
  651. gpus = []GpuInfo{}
  652. }
  653. return SystemInfo{
  654. System: cpus[0],
  655. GPUs: gpus,
  656. UnsupportedGPUs: unsupportedGPUs,
  657. DiscoveryErrors: discoveryErrors,
  658. }
  659. }