gpu.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754
  1. //go:build linux || windows
  2. package discover
  3. /*
  4. #cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
  5. #cgo windows LDFLAGS: -lpthread
  6. #include "gpu_info.h"
  7. */
  8. import "C"
  9. import (
  10. "fmt"
  11. "log/slog"
  12. "os"
  13. "path/filepath"
  14. "runtime"
  15. "strconv"
  16. "strings"
  17. "sync"
  18. "unsafe"
  19. "github.com/ollama/ollama/envconfig"
  20. "github.com/ollama/ollama/format"
  21. "github.com/ollama/ollama/runners"
  22. )
  23. type cudaHandles struct {
  24. deviceCount int
  25. cudart *C.cudart_handle_t
  26. nvcuda *C.nvcuda_handle_t
  27. nvml *C.nvml_handle_t
  28. }
  29. type oneapiHandles struct {
  30. oneapi *C.oneapi_handle_t
  31. deviceCount int
  32. }
  33. const (
  34. cudaMinimumMemory = 457 * format.MebiByte
  35. rocmMinimumMemory = 457 * format.MebiByte
  36. // TODO OneAPI minimum memory
  37. )
  38. var (
  39. gpuMutex sync.Mutex
  40. bootstrapped bool
  41. cpus []CPUInfo
  42. cudaGPUs []CudaGPUInfo
  43. nvcudaLibPath string
  44. cudartLibPath string
  45. oneapiLibPath string
  46. nvmlLibPath string
  47. rocmGPUs []RocmGPUInfo
  48. oneapiGPUs []OneapiGPUInfo
  49. // If any discovered GPUs are incompatible, report why
  50. unsupportedGPUs []UnsupportedGPUInfo
  51. // Keep track of errors during bootstrapping so that if GPUs are missing
  52. // they expected to be present this may explain why
  53. bootstrapErrors []error
  54. )
  55. // With our current CUDA compile flags, older than 5.0 will not work properly
  56. // (string values used to allow ldflags overrides at build time)
  57. var (
  58. CudaComputeMajorMin = "5"
  59. CudaComputeMinorMin = "0"
  60. )
  61. var RocmComputeMajorMin = "9"
  62. // TODO find a better way to detect iGPU instead of minimum memory
  63. const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
  64. // Note: gpuMutex must already be held
  65. func initCudaHandles() *cudaHandles {
  66. // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
  67. cHandles := &cudaHandles{}
  68. // Short Circuit if we already know which library to use
  69. // ignore bootstrap errors in this case since we already recorded them
  70. if nvmlLibPath != "" {
  71. cHandles.nvml, _, _ = loadNVMLMgmt([]string{nvmlLibPath})
  72. return cHandles
  73. }
  74. if nvcudaLibPath != "" {
  75. cHandles.deviceCount, cHandles.nvcuda, _, _ = loadNVCUDAMgmt([]string{nvcudaLibPath})
  76. return cHandles
  77. }
  78. if cudartLibPath != "" {
  79. cHandles.deviceCount, cHandles.cudart, _, _ = loadCUDARTMgmt([]string{cudartLibPath})
  80. return cHandles
  81. }
  82. slog.Debug("searching for GPU discovery libraries for NVIDIA")
  83. var cudartMgmtPatterns []string
  84. // Aligned with driver, we can't carry as payloads
  85. nvcudaMgmtPatterns := NvcudaGlobs
  86. if runtime.GOOS == "windows" {
  87. localAppData := os.Getenv("LOCALAPPDATA")
  88. cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
  89. }
  90. libDirs := LibraryDirs()
  91. for _, d := range libDirs {
  92. cudartMgmtPatterns = append(cudartMgmtPatterns, filepath.Join(d, CudartMgmtName))
  93. }
  94. cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
  95. if len(NvmlGlobs) > 0 {
  96. nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
  97. if len(nvmlLibPaths) > 0 {
  98. nvml, libPath, err := loadNVMLMgmt(nvmlLibPaths)
  99. if nvml != nil {
  100. slog.Debug("nvidia-ml loaded", "library", libPath)
  101. cHandles.nvml = nvml
  102. nvmlLibPath = libPath
  103. }
  104. if err != nil {
  105. bootstrapErrors = append(bootstrapErrors, err)
  106. }
  107. }
  108. }
  109. nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
  110. if len(nvcudaLibPaths) > 0 {
  111. deviceCount, nvcuda, libPath, err := loadNVCUDAMgmt(nvcudaLibPaths)
  112. if nvcuda != nil {
  113. slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
  114. cHandles.nvcuda = nvcuda
  115. cHandles.deviceCount = deviceCount
  116. nvcudaLibPath = libPath
  117. return cHandles
  118. }
  119. if err != nil {
  120. bootstrapErrors = append(bootstrapErrors, err)
  121. }
  122. }
  123. cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
  124. if len(cudartLibPaths) > 0 {
  125. deviceCount, cudart, libPath, err := loadCUDARTMgmt(cudartLibPaths)
  126. if cudart != nil {
  127. slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
  128. cHandles.cudart = cudart
  129. cHandles.deviceCount = deviceCount
  130. cudartLibPath = libPath
  131. return cHandles
  132. }
  133. if err != nil {
  134. bootstrapErrors = append(bootstrapErrors, err)
  135. }
  136. }
  137. return cHandles
  138. }
  139. // Note: gpuMutex must already be held
  140. func initOneAPIHandles() *oneapiHandles {
  141. oHandles := &oneapiHandles{}
  142. // Short Circuit if we already know which library to use
  143. // ignore bootstrap errors in this case since we already recorded them
  144. if oneapiLibPath != "" {
  145. oHandles.deviceCount, oHandles.oneapi, _, _ = loadOneapiMgmt([]string{oneapiLibPath})
  146. return oHandles
  147. }
  148. oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
  149. if len(oneapiLibPaths) > 0 {
  150. var err error
  151. oHandles.deviceCount, oHandles.oneapi, oneapiLibPath, err = loadOneapiMgmt(oneapiLibPaths)
  152. if err != nil {
  153. bootstrapErrors = append(bootstrapErrors, err)
  154. }
  155. }
  156. return oHandles
  157. }
  158. func GetCPUInfo() GpuInfoList {
  159. gpuMutex.Lock()
  160. if !bootstrapped {
  161. gpuMutex.Unlock()
  162. GetGPUInfo()
  163. } else {
  164. gpuMutex.Unlock()
  165. }
  166. return GpuInfoList{cpus[0].GpuInfo}
  167. }
  168. func GetGPUInfo() GpuInfoList {
  169. // TODO - consider exploring lspci (and equivalent on windows) to check for
  170. // GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
  171. gpuMutex.Lock()
  172. defer gpuMutex.Unlock()
  173. needRefresh := true
  174. var cHandles *cudaHandles
  175. var oHandles *oneapiHandles
  176. defer func() {
  177. if cHandles != nil {
  178. if cHandles.cudart != nil {
  179. C.cudart_release(*cHandles.cudart)
  180. }
  181. if cHandles.nvcuda != nil {
  182. C.nvcuda_release(*cHandles.nvcuda)
  183. }
  184. if cHandles.nvml != nil {
  185. C.nvml_release(*cHandles.nvml)
  186. }
  187. }
  188. if oHandles != nil {
  189. if oHandles.oneapi != nil {
  190. // TODO - is this needed?
  191. C.oneapi_release(*oHandles.oneapi)
  192. }
  193. }
  194. }()
  195. if !bootstrapped {
  196. slog.Info("looking for compatible GPUs")
  197. cudaComputeMajorMin, err := strconv.Atoi(CudaComputeMajorMin)
  198. if err != nil {
  199. slog.Error("invalid CudaComputeMajorMin setting", "value", CudaComputeMajorMin, "error", err)
  200. }
  201. cudaComputeMinorMin, err := strconv.Atoi(CudaComputeMinorMin)
  202. if err != nil {
  203. slog.Error("invalid CudaComputeMinorMin setting", "value", CudaComputeMinorMin, "error", err)
  204. }
  205. bootstrapErrors = []error{}
  206. needRefresh = false
  207. var memInfo C.mem_info_t
  208. mem, err := GetCPUMem()
  209. if err != nil {
  210. slog.Warn("error looking up system memory", "error", err)
  211. }
  212. depPaths := LibraryDirs()
  213. details, err := GetCPUDetails()
  214. if err != nil {
  215. slog.Warn("failed to lookup CPU details", "error", err)
  216. }
  217. cpus = []CPUInfo{
  218. {
  219. GpuInfo: GpuInfo{
  220. memInfo: mem,
  221. Library: "cpu",
  222. Variant: runners.GetCPUCapability().String(),
  223. ID: "0",
  224. DependencyPath: depPaths,
  225. },
  226. CPUs: details,
  227. },
  228. }
  229. // Load ALL libraries
  230. cHandles = initCudaHandles()
  231. // NVIDIA
  232. for i := range cHandles.deviceCount {
  233. if cHandles.cudart != nil || cHandles.nvcuda != nil {
  234. gpuInfo := CudaGPUInfo{
  235. GpuInfo: GpuInfo{
  236. Library: "cuda",
  237. },
  238. index: i,
  239. }
  240. var driverMajor int
  241. var driverMinor int
  242. if cHandles.cudart != nil {
  243. C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
  244. } else {
  245. C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
  246. driverMajor = int(cHandles.nvcuda.driver_major)
  247. driverMinor = int(cHandles.nvcuda.driver_minor)
  248. }
  249. if memInfo.err != nil {
  250. slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  251. C.free(unsafe.Pointer(memInfo.err))
  252. continue
  253. }
  254. gpuInfo.TotalMemory = uint64(memInfo.total)
  255. gpuInfo.FreeMemory = uint64(memInfo.free)
  256. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  257. gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
  258. gpuInfo.computeMajor = int(memInfo.major)
  259. gpuInfo.computeMinor = int(memInfo.minor)
  260. gpuInfo.MinimumMemory = cudaMinimumMemory
  261. gpuInfo.DriverMajor = driverMajor
  262. gpuInfo.DriverMinor = driverMinor
  263. variant := cudaVariant(gpuInfo)
  264. if depPaths != nil {
  265. gpuInfo.DependencyPath = depPaths
  266. // Check for variant specific directory
  267. if variant != "" {
  268. for _, d := range depPaths {
  269. if _, err := os.Stat(filepath.Join(d, "cuda_"+variant)); err == nil {
  270. // Put the variant directory first in the search path to avoid runtime linking to the wrong library
  271. gpuInfo.DependencyPath = append([]string{filepath.Join(d, "cuda_"+variant)}, gpuInfo.DependencyPath...)
  272. break
  273. }
  274. }
  275. }
  276. }
  277. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  278. gpuInfo.Variant = variant
  279. if int(memInfo.major) < cudaComputeMajorMin || (int(memInfo.major) == cudaComputeMajorMin && int(memInfo.minor) < cudaComputeMinorMin) {
  280. unsupportedGPUs = append(unsupportedGPUs,
  281. UnsupportedGPUInfo{
  282. GpuInfo: gpuInfo.GpuInfo,
  283. })
  284. slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
  285. continue
  286. }
  287. // query the management library as well so we can record any skew between the two
  288. // which represents overhead on the GPU we must set aside on subsequent updates
  289. if cHandles.nvml != nil {
  290. uuid := C.CString(gpuInfo.ID)
  291. defer C.free(unsafe.Pointer(uuid))
  292. C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
  293. if memInfo.err != nil {
  294. slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  295. C.free(unsafe.Pointer(memInfo.err))
  296. } else {
  297. if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
  298. gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
  299. slog.Info("detected OS VRAM overhead",
  300. "id", gpuInfo.ID,
  301. "library", gpuInfo.Library,
  302. "compute", gpuInfo.Compute,
  303. "driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
  304. "name", gpuInfo.Name,
  305. "overhead", format.HumanBytes2(gpuInfo.OSOverhead),
  306. )
  307. }
  308. }
  309. }
  310. // TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
  311. cudaGPUs = append(cudaGPUs, gpuInfo)
  312. }
  313. }
  314. // Intel
  315. if envconfig.IntelGPU() {
  316. oHandles = initOneAPIHandles()
  317. if oHandles != nil && oHandles.oneapi != nil {
  318. for d := range oHandles.oneapi.num_drivers {
  319. if oHandles.oneapi == nil {
  320. // shouldn't happen
  321. slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
  322. continue
  323. }
  324. devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
  325. for i := range devCount {
  326. gpuInfo := OneapiGPUInfo{
  327. GpuInfo: GpuInfo{
  328. Library: "oneapi",
  329. },
  330. driverIndex: int(d),
  331. gpuIndex: int(i),
  332. }
  333. // TODO - split bootstrapping from updating free memory
  334. C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
  335. // TODO - convert this to MinimumMemory based on testing...
  336. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  337. memInfo.free = C.uint64_t(totalFreeMem)
  338. gpuInfo.TotalMemory = uint64(memInfo.total)
  339. gpuInfo.FreeMemory = uint64(memInfo.free)
  340. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  341. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  342. gpuInfo.DependencyPath = depPaths
  343. oneapiGPUs = append(oneapiGPUs, gpuInfo)
  344. }
  345. }
  346. }
  347. }
  348. rocmGPUs, err = AMDGetGPUInfo()
  349. if err != nil {
  350. bootstrapErrors = append(bootstrapErrors, err)
  351. }
  352. bootstrapped = true
  353. if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
  354. slog.Info("no compatible GPUs were discovered")
  355. }
  356. // TODO verify we have runners for the discovered GPUs, filter out any that aren't supported with good error messages
  357. }
  358. // For detected GPUs, load library if not loaded
  359. // Refresh free memory usage
  360. if needRefresh {
  361. mem, err := GetCPUMem()
  362. if err != nil {
  363. slog.Warn("error looking up system memory", "error", err)
  364. } else {
  365. slog.Debug("updating system memory data",
  366. slog.Group(
  367. "before",
  368. "total", format.HumanBytes2(cpus[0].TotalMemory),
  369. "free", format.HumanBytes2(cpus[0].FreeMemory),
  370. "free_swap", format.HumanBytes2(cpus[0].FreeSwap),
  371. ),
  372. slog.Group(
  373. "now",
  374. "total", format.HumanBytes2(mem.TotalMemory),
  375. "free", format.HumanBytes2(mem.FreeMemory),
  376. "free_swap", format.HumanBytes2(mem.FreeSwap),
  377. ),
  378. )
  379. cpus[0].FreeMemory = mem.FreeMemory
  380. cpus[0].FreeSwap = mem.FreeSwap
  381. }
  382. var memInfo C.mem_info_t
  383. if cHandles == nil && len(cudaGPUs) > 0 {
  384. cHandles = initCudaHandles()
  385. }
  386. for i, gpu := range cudaGPUs {
  387. if cHandles.nvml != nil {
  388. uuid := C.CString(gpu.ID)
  389. defer C.free(unsafe.Pointer(uuid))
  390. C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
  391. } else if cHandles.cudart != nil {
  392. C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
  393. } else if cHandles.nvcuda != nil {
  394. C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
  395. memInfo.used = memInfo.total - memInfo.free
  396. } else {
  397. // shouldn't happen
  398. slog.Warn("no valid cuda library loaded to refresh vram usage")
  399. break
  400. }
  401. if memInfo.err != nil {
  402. slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  403. C.free(unsafe.Pointer(memInfo.err))
  404. continue
  405. }
  406. if memInfo.free == 0 {
  407. slog.Warn("error looking up nvidia GPU memory")
  408. continue
  409. }
  410. if cHandles.nvml != nil && gpu.OSOverhead > 0 {
  411. // When using the management library update based on recorded overhead
  412. memInfo.free -= C.uint64_t(gpu.OSOverhead)
  413. }
  414. slog.Debug("updating cuda memory data",
  415. "gpu", gpu.ID,
  416. "name", gpu.Name,
  417. "overhead", format.HumanBytes2(gpu.OSOverhead),
  418. slog.Group(
  419. "before",
  420. "total", format.HumanBytes2(gpu.TotalMemory),
  421. "free", format.HumanBytes2(gpu.FreeMemory),
  422. ),
  423. slog.Group(
  424. "now",
  425. "total", format.HumanBytes2(uint64(memInfo.total)),
  426. "free", format.HumanBytes2(uint64(memInfo.free)),
  427. "used", format.HumanBytes2(uint64(memInfo.used)),
  428. ),
  429. )
  430. cudaGPUs[i].FreeMemory = uint64(memInfo.free)
  431. }
  432. if oHandles == nil && len(oneapiGPUs) > 0 {
  433. oHandles = initOneAPIHandles()
  434. }
  435. for i, gpu := range oneapiGPUs {
  436. if oHandles.oneapi == nil {
  437. // shouldn't happen
  438. slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
  439. continue
  440. }
  441. C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
  442. // TODO - convert this to MinimumMemory based on testing...
  443. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  444. memInfo.free = C.uint64_t(totalFreeMem)
  445. oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
  446. }
  447. err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
  448. if err != nil {
  449. slog.Debug("problem refreshing ROCm free memory", "error", err)
  450. }
  451. }
  452. resp := []GpuInfo{}
  453. for _, gpu := range cudaGPUs {
  454. resp = append(resp, gpu.GpuInfo)
  455. }
  456. for _, gpu := range rocmGPUs {
  457. resp = append(resp, gpu.GpuInfo)
  458. }
  459. for _, gpu := range oneapiGPUs {
  460. resp = append(resp, gpu.GpuInfo)
  461. }
  462. if len(resp) == 0 {
  463. resp = append(resp, cpus[0].GpuInfo)
  464. }
  465. return resp
  466. }
  467. func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
  468. // Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
  469. var ldPaths []string
  470. gpuLibPaths := []string{}
  471. slog.Debug("Searching for GPU library", "name", baseLibName)
  472. // Start with our bundled libraries
  473. patterns := []string{}
  474. for _, d := range LibraryDirs() {
  475. patterns = append(patterns, filepath.Join(d, baseLibName))
  476. }
  477. switch runtime.GOOS {
  478. case "windows":
  479. ldPaths = strings.Split(os.Getenv("PATH"), ";")
  480. case "linux":
  481. ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
  482. default:
  483. return gpuLibPaths
  484. }
  485. // Then with whatever we find in the PATH/LD_LIBRARY_PATH
  486. for _, ldPath := range ldPaths {
  487. d, err := filepath.Abs(ldPath)
  488. if err != nil {
  489. continue
  490. }
  491. patterns = append(patterns, filepath.Join(d, baseLibName))
  492. }
  493. patterns = append(patterns, defaultPatterns...)
  494. slog.Debug("gpu library search", "globs", patterns)
  495. for _, pattern := range patterns {
  496. // Nvidia PhysX known to return bogus results
  497. if strings.Contains(pattern, "PhysX") {
  498. slog.Debug("skipping PhysX cuda library path", "path", pattern)
  499. continue
  500. }
  501. // Ignore glob discovery errors
  502. matches, _ := filepath.Glob(pattern)
  503. for _, match := range matches {
  504. // Resolve any links so we don't try the same lib multiple times
  505. // and weed out any dups across globs
  506. libPath := match
  507. tmp := match
  508. var err error
  509. for ; err == nil; tmp, err = os.Readlink(libPath) {
  510. if !filepath.IsAbs(tmp) {
  511. tmp = filepath.Join(filepath.Dir(libPath), tmp)
  512. }
  513. libPath = tmp
  514. }
  515. new := true
  516. for _, cmp := range gpuLibPaths {
  517. if cmp == libPath {
  518. new = false
  519. break
  520. }
  521. }
  522. if new {
  523. gpuLibPaths = append(gpuLibPaths, libPath)
  524. }
  525. }
  526. }
  527. slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
  528. return gpuLibPaths
  529. }
  530. // Bootstrap the runtime library
  531. // Returns: num devices, handle, libPath, error
  532. func loadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string, error) {
  533. var resp C.cudart_init_resp_t
  534. resp.ch.verbose = getVerboseState()
  535. var err error
  536. for _, libPath := range cudartLibPaths {
  537. lib := C.CString(libPath)
  538. defer C.free(unsafe.Pointer(lib))
  539. C.cudart_init(lib, &resp)
  540. if resp.err != nil {
  541. err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
  542. slog.Debug(err.Error())
  543. C.free(unsafe.Pointer(resp.err))
  544. } else {
  545. err = nil
  546. return int(resp.num_devices), &resp.ch, libPath, err
  547. }
  548. }
  549. return 0, nil, "", err
  550. }
  551. // Bootstrap the driver library
  552. // Returns: num devices, handle, libPath, error
  553. func loadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string, error) {
  554. var resp C.nvcuda_init_resp_t
  555. resp.ch.verbose = getVerboseState()
  556. var err error
  557. for _, libPath := range nvcudaLibPaths {
  558. lib := C.CString(libPath)
  559. defer C.free(unsafe.Pointer(lib))
  560. C.nvcuda_init(lib, &resp)
  561. if resp.err != nil {
  562. // Decide what log level based on the type of error message to help users understand why
  563. switch resp.cudaErr {
  564. case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
  565. err = fmt.Errorf("version mismatch between driver and cuda driver library - reboot or upgrade may be required: library %s", libPath)
  566. slog.Warn(err.Error())
  567. case C.CUDA_ERROR_NO_DEVICE:
  568. err = fmt.Errorf("no nvidia devices detected by library %s", libPath)
  569. slog.Info(err.Error())
  570. case C.CUDA_ERROR_UNKNOWN:
  571. err = fmt.Errorf("unknown error initializing cuda driver library %s: %s. see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information", libPath, C.GoString(resp.err))
  572. slog.Warn(err.Error())
  573. default:
  574. msg := C.GoString(resp.err)
  575. if strings.Contains(msg, "wrong ELF class") {
  576. slog.Debug("skipping 32bit library", "library", libPath)
  577. } else {
  578. err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
  579. slog.Info(err.Error())
  580. }
  581. }
  582. C.free(unsafe.Pointer(resp.err))
  583. } else {
  584. err = nil
  585. return int(resp.num_devices), &resp.ch, libPath, err
  586. }
  587. }
  588. return 0, nil, "", err
  589. }
  590. // Bootstrap the management library
  591. // Returns: handle, libPath, error
  592. func loadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string, error) {
  593. var resp C.nvml_init_resp_t
  594. resp.ch.verbose = getVerboseState()
  595. var err error
  596. for _, libPath := range nvmlLibPaths {
  597. lib := C.CString(libPath)
  598. defer C.free(unsafe.Pointer(lib))
  599. C.nvml_init(lib, &resp)
  600. if resp.err != nil {
  601. err = fmt.Errorf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err))
  602. slog.Info(err.Error())
  603. C.free(unsafe.Pointer(resp.err))
  604. } else {
  605. err = nil
  606. return &resp.ch, libPath, err
  607. }
  608. }
  609. return nil, "", err
  610. }
  611. // bootstrap the Intel GPU library
  612. // Returns: num devices, handle, libPath, error
  613. func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, error) {
  614. var resp C.oneapi_init_resp_t
  615. num_devices := 0
  616. resp.oh.verbose = getVerboseState()
  617. var err error
  618. for _, libPath := range oneapiLibPaths {
  619. lib := C.CString(libPath)
  620. defer C.free(unsafe.Pointer(lib))
  621. C.oneapi_init(lib, &resp)
  622. if resp.err != nil {
  623. err = fmt.Errorf("Unable to load oneAPI management library %s: %s", libPath, C.GoString(resp.err))
  624. slog.Debug(err.Error())
  625. C.free(unsafe.Pointer(resp.err))
  626. } else {
  627. err = nil
  628. for i := range resp.oh.num_drivers {
  629. num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
  630. }
  631. return num_devices, &resp.oh, libPath, err
  632. }
  633. }
  634. return 0, nil, "", err
  635. }
  636. func getVerboseState() C.uint16_t {
  637. if envconfig.Debug() {
  638. return C.uint16_t(1)
  639. }
  640. return C.uint16_t(0)
  641. }
  642. // Given the list of GPUs this instantiation is targeted for,
  643. // figure out the visible devices environment variable
  644. //
  645. // If different libraries are detected, the first one is what we use
  646. func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
  647. if len(l) == 0 {
  648. return "", ""
  649. }
  650. switch l[0].Library {
  651. case "cuda":
  652. return cudaGetVisibleDevicesEnv(l)
  653. case "rocm":
  654. return rocmGetVisibleDevicesEnv(l)
  655. case "oneapi":
  656. return oneapiGetVisibleDevicesEnv(l)
  657. default:
  658. slog.Debug("no filter required for library " + l[0].Library)
  659. return "", ""
  660. }
  661. }
  662. func LibraryDirs() []string {
  663. // dependencies can exist wherever we found the runners (e.g. build tree for developers) and relative to the executable
  664. // This can be simplified once we no longer carry runners as payloads
  665. exe, err := os.Executable()
  666. if err != nil {
  667. slog.Warn("failed to lookup executable path", "error", err)
  668. return nil
  669. }
  670. lib := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama")
  671. if _, err := os.Stat(lib); err != nil {
  672. return nil
  673. }
  674. return []string{lib}
  675. }
  676. func GetSystemInfo() SystemInfo {
  677. gpus := GetGPUInfo()
  678. gpuMutex.Lock()
  679. defer gpuMutex.Unlock()
  680. discoveryErrors := []string{}
  681. for _, err := range bootstrapErrors {
  682. discoveryErrors = append(discoveryErrors, err.Error())
  683. }
  684. if len(gpus) == 1 && gpus[0].Library == "cpu" {
  685. gpus = []GpuInfo{}
  686. }
  687. return SystemInfo{
  688. System: cpus[0],
  689. GPUs: gpus,
  690. UnsupportedGPUs: unsupportedGPUs,
  691. DiscoveryErrors: discoveryErrors,
  692. }
  693. }