gpu.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671
  1. //go:build linux || windows
  2. package gpu
  3. /*
  4. #cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
  5. #cgo windows LDFLAGS: -lpthread
  6. #include "gpu_info.h"
  7. */
  8. import "C"
  9. import (
  10. "fmt"
  11. "log/slog"
  12. "os"
  13. "path/filepath"
  14. "runtime"
  15. "strings"
  16. "sync"
  17. "unsafe"
  18. "github.com/ollama/ollama/envconfig"
  19. "github.com/ollama/ollama/format"
  20. )
  21. type cudaHandles struct {
  22. deviceCount int
  23. cudart *C.cudart_handle_t
  24. nvcuda *C.nvcuda_handle_t
  25. nvml *C.nvml_handle_t
  26. }
  27. type oneapiHandles struct {
  28. oneapi *C.oneapi_handle_t
  29. deviceCount int
  30. }
  31. const (
  32. cudaMinimumMemory = 457 * format.MebiByte
  33. rocmMinimumMemory = 457 * format.MebiByte
  34. // TODO OneAPI minimum memory
  35. )
  36. var (
  37. gpuMutex sync.Mutex
  38. bootstrapped bool
  39. cpuCapability CPUCapability
  40. cpus []CPUInfo
  41. cudaGPUs []CudaGPUInfo
  42. nvcudaLibPath string
  43. cudartLibPath string
  44. oneapiLibPath string
  45. nvmlLibPath string
  46. rocmGPUs []RocmGPUInfo
  47. oneapiGPUs []OneapiGPUInfo
  48. )
  49. // With our current CUDA compile flags, older than 5.0 will not work properly
  50. var CudaComputeMin = [2]C.int{5, 0}
  51. var RocmComputeMin = 9
  52. // TODO find a better way to detect iGPU instead of minimum memory
  53. const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
  54. // Note: gpuMutex must already be held
  55. func initCudaHandles() *cudaHandles {
  56. // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
  57. cHandles := &cudaHandles{}
  58. // Short Circuit if we already know which library to use
  59. if nvmlLibPath != "" {
  60. cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
  61. return cHandles
  62. }
  63. if nvcudaLibPath != "" {
  64. cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
  65. return cHandles
  66. }
  67. if cudartLibPath != "" {
  68. cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
  69. return cHandles
  70. }
  71. slog.Debug("searching for GPU discovery libraries for NVIDIA")
  72. var cudartMgmtPatterns []string
  73. // Aligned with driver, we can't carry as payloads
  74. nvcudaMgmtPatterns := NvcudaGlobs
  75. if runtime.GOOS == "windows" {
  76. localAppData := os.Getenv("LOCALAPPDATA")
  77. cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
  78. }
  79. tmpDir, _ := PayloadsDir()
  80. if tmpDir != "" {
  81. // TODO - add "payloads" for subprocess
  82. cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)}
  83. }
  84. cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
  85. if len(NvmlGlobs) > 0 {
  86. nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
  87. if len(nvmlLibPaths) > 0 {
  88. nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
  89. if nvml != nil {
  90. slog.Debug("nvidia-ml loaded", "library", libPath)
  91. cHandles.nvml = nvml
  92. nvmlLibPath = libPath
  93. }
  94. }
  95. }
  96. nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
  97. if len(nvcudaLibPaths) > 0 {
  98. deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
  99. if nvcuda != nil {
  100. slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
  101. cHandles.nvcuda = nvcuda
  102. cHandles.deviceCount = deviceCount
  103. nvcudaLibPath = libPath
  104. return cHandles
  105. }
  106. }
  107. cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
  108. if len(cudartLibPaths) > 0 {
  109. deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
  110. if cudart != nil {
  111. slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
  112. cHandles.cudart = cudart
  113. cHandles.deviceCount = deviceCount
  114. cudartLibPath = libPath
  115. return cHandles
  116. }
  117. }
  118. return cHandles
  119. }
  120. // Note: gpuMutex must already be held
  121. func initOneAPIHandles() *oneapiHandles {
  122. oHandles := &oneapiHandles{}
  123. // Short Circuit if we already know which library to use
  124. if oneapiLibPath != "" {
  125. oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
  126. return oHandles
  127. }
  128. oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
  129. if len(oneapiLibPaths) > 0 {
  130. oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
  131. }
  132. return oHandles
  133. }
  134. func GetCPUInfo() GpuInfoList {
  135. gpuMutex.Lock()
  136. if !bootstrapped {
  137. gpuMutex.Unlock()
  138. GetGPUInfo()
  139. } else {
  140. gpuMutex.Unlock()
  141. }
  142. return GpuInfoList{cpus[0].GpuInfo}
  143. }
  144. func GetGPUInfo() GpuInfoList {
  145. // TODO - consider exploring lspci (and equivalent on windows) to check for
  146. // GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
  147. gpuMutex.Lock()
  148. defer gpuMutex.Unlock()
  149. needRefresh := true
  150. var cHandles *cudaHandles
  151. var oHandles *oneapiHandles
  152. defer func() {
  153. if cHandles != nil {
  154. if cHandles.cudart != nil {
  155. C.cudart_release(*cHandles.cudart)
  156. }
  157. if cHandles.nvcuda != nil {
  158. C.nvcuda_release(*cHandles.nvcuda)
  159. }
  160. if cHandles.nvml != nil {
  161. C.nvml_release(*cHandles.nvml)
  162. }
  163. }
  164. if oHandles != nil {
  165. if oHandles.oneapi != nil {
  166. // TODO - is this needed?
  167. C.oneapi_release(*oHandles.oneapi)
  168. }
  169. }
  170. }()
  171. if !bootstrapped {
  172. slog.Info("looking for compatible GPUs")
  173. needRefresh = false
  174. cpuCapability = GetCPUCapability()
  175. var memInfo C.mem_info_t
  176. mem, err := GetCPUMem()
  177. if err != nil {
  178. slog.Warn("error looking up system memory", "error", err)
  179. }
  180. cpus = []CPUInfo{
  181. {
  182. GpuInfo: GpuInfo{
  183. memInfo: mem,
  184. Library: "cpu",
  185. Variant: cpuCapability.String(),
  186. ID: "0",
  187. },
  188. },
  189. }
  190. // Fallback to CPU mode if we're lacking required vector extensions on x86
  191. if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
  192. slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability)
  193. bootstrapped = true
  194. // No need to do any GPU discovery, since we can't run on them
  195. return GpuInfoList{cpus[0].GpuInfo}
  196. }
  197. depPath := LibraryDir()
  198. // Load ALL libraries
  199. cHandles = initCudaHandles()
  200. // NVIDIA
  201. for i := range cHandles.deviceCount {
  202. if cHandles.cudart != nil || cHandles.nvcuda != nil {
  203. gpuInfo := CudaGPUInfo{
  204. GpuInfo: GpuInfo{
  205. Library: "cuda",
  206. },
  207. index: i,
  208. }
  209. var driverMajor int
  210. var driverMinor int
  211. if cHandles.cudart != nil {
  212. C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
  213. } else {
  214. C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
  215. driverMajor = int(cHandles.nvcuda.driver_major)
  216. driverMinor = int(cHandles.nvcuda.driver_minor)
  217. }
  218. if memInfo.err != nil {
  219. slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  220. C.free(unsafe.Pointer(memInfo.err))
  221. continue
  222. }
  223. if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
  224. slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
  225. continue
  226. }
  227. gpuInfo.TotalMemory = uint64(memInfo.total)
  228. gpuInfo.FreeMemory = uint64(memInfo.free)
  229. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  230. gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
  231. gpuInfo.computeMajor = int(memInfo.major)
  232. gpuInfo.computeMinor = int(memInfo.minor)
  233. gpuInfo.MinimumMemory = cudaMinimumMemory
  234. gpuInfo.DriverMajor = driverMajor
  235. gpuInfo.DriverMinor = driverMinor
  236. variant := cudaVariant(gpuInfo)
  237. if depPath != "" {
  238. gpuInfo.DependencyPath = depPath
  239. // Check for variant specific directory
  240. if variant != "" {
  241. if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil {
  242. gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant)
  243. }
  244. }
  245. }
  246. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  247. gpuInfo.Variant = variant
  248. // query the management library as well so we can record any skew between the two
  249. // which represents overhead on the GPU we must set aside on subsequent updates
  250. if cHandles.nvml != nil {
  251. C.nvml_get_free(*cHandles.nvml, C.int(gpuInfo.index), &memInfo.free, &memInfo.total, &memInfo.used)
  252. if memInfo.err != nil {
  253. slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  254. C.free(unsafe.Pointer(memInfo.err))
  255. } else {
  256. if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
  257. gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
  258. slog.Info("detected OS VRAM overhead",
  259. "id", gpuInfo.ID,
  260. "library", gpuInfo.Library,
  261. "compute", gpuInfo.Compute,
  262. "driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
  263. "name", gpuInfo.Name,
  264. "overhead", format.HumanBytes2(gpuInfo.OSOverhead),
  265. )
  266. }
  267. }
  268. }
  269. // TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
  270. cudaGPUs = append(cudaGPUs, gpuInfo)
  271. }
  272. }
  273. // Intel
  274. if envconfig.IntelGPU() {
  275. oHandles = initOneAPIHandles()
  276. if oHandles != nil && oHandles.oneapi != nil {
  277. for d := range oHandles.oneapi.num_drivers {
  278. if oHandles.oneapi == nil {
  279. // shouldn't happen
  280. slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
  281. continue
  282. }
  283. devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
  284. for i := range devCount {
  285. gpuInfo := OneapiGPUInfo{
  286. GpuInfo: GpuInfo{
  287. Library: "oneapi",
  288. },
  289. driverIndex: int(d),
  290. gpuIndex: int(i),
  291. }
  292. // TODO - split bootstrapping from updating free memory
  293. C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
  294. // TODO - convert this to MinimumMemory based on testing...
  295. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  296. memInfo.free = C.uint64_t(totalFreeMem)
  297. gpuInfo.TotalMemory = uint64(memInfo.total)
  298. gpuInfo.FreeMemory = uint64(memInfo.free)
  299. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  300. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  301. gpuInfo.DependencyPath = depPath
  302. oneapiGPUs = append(oneapiGPUs, gpuInfo)
  303. }
  304. }
  305. }
  306. }
  307. rocmGPUs = AMDGetGPUInfo()
  308. bootstrapped = true
  309. if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
  310. slog.Info("no compatible GPUs were discovered")
  311. }
  312. }
  313. // For detected GPUs, load library if not loaded
  314. // Refresh free memory usage
  315. if needRefresh {
  316. mem, err := GetCPUMem()
  317. if err != nil {
  318. slog.Warn("error looking up system memory", "error", err)
  319. } else {
  320. slog.Debug("updating system memory data",
  321. slog.Group(
  322. "before",
  323. "total", format.HumanBytes2(cpus[0].TotalMemory),
  324. "free", format.HumanBytes2(cpus[0].FreeMemory),
  325. "free_swap", format.HumanBytes2(cpus[0].FreeSwap),
  326. ),
  327. slog.Group(
  328. "now",
  329. "total", format.HumanBytes2(mem.TotalMemory),
  330. "free", format.HumanBytes2(mem.FreeMemory),
  331. "free_swap", format.HumanBytes2(mem.FreeSwap),
  332. ),
  333. )
  334. cpus[0].FreeMemory = mem.FreeMemory
  335. cpus[0].FreeSwap = mem.FreeSwap
  336. }
  337. var memInfo C.mem_info_t
  338. if cHandles == nil && len(cudaGPUs) > 0 {
  339. cHandles = initCudaHandles()
  340. }
  341. for i, gpu := range cudaGPUs {
  342. if cHandles.nvml != nil {
  343. C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
  344. } else if cHandles.cudart != nil {
  345. C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
  346. } else if cHandles.nvcuda != nil {
  347. C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
  348. memInfo.used = memInfo.total - memInfo.free
  349. } else {
  350. // shouldn't happen
  351. slog.Warn("no valid cuda library loaded to refresh vram usage")
  352. break
  353. }
  354. if memInfo.err != nil {
  355. slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  356. C.free(unsafe.Pointer(memInfo.err))
  357. continue
  358. }
  359. if memInfo.free == 0 {
  360. slog.Warn("error looking up nvidia GPU memory")
  361. continue
  362. }
  363. if cHandles.nvml != nil && gpu.OSOverhead > 0 {
  364. // When using the management library update based on recorded overhead
  365. memInfo.free -= C.uint64_t(gpu.OSOverhead)
  366. }
  367. slog.Debug("updating cuda memory data",
  368. "gpu", gpu.ID,
  369. "name", gpu.Name,
  370. "overhead", format.HumanBytes2(gpu.OSOverhead),
  371. slog.Group(
  372. "before",
  373. "total", format.HumanBytes2(gpu.TotalMemory),
  374. "free", format.HumanBytes2(gpu.FreeMemory),
  375. ),
  376. slog.Group(
  377. "now",
  378. "total", format.HumanBytes2(uint64(memInfo.total)),
  379. "free", format.HumanBytes2(uint64(memInfo.free)),
  380. "used", format.HumanBytes2(uint64(memInfo.used)),
  381. ),
  382. )
  383. cudaGPUs[i].FreeMemory = uint64(memInfo.free)
  384. }
  385. if oHandles == nil && len(oneapiGPUs) > 0 {
  386. oHandles = initOneAPIHandles()
  387. }
  388. for i, gpu := range oneapiGPUs {
  389. if oHandles.oneapi == nil {
  390. // shouldn't happen
  391. slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
  392. continue
  393. }
  394. C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
  395. // TODO - convert this to MinimumMemory based on testing...
  396. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  397. memInfo.free = C.uint64_t(totalFreeMem)
  398. oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
  399. }
  400. err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
  401. if err != nil {
  402. slog.Debug("problem refreshing ROCm free memory", "error", err)
  403. }
  404. }
  405. resp := []GpuInfo{}
  406. for _, gpu := range cudaGPUs {
  407. resp = append(resp, gpu.GpuInfo)
  408. }
  409. for _, gpu := range rocmGPUs {
  410. resp = append(resp, gpu.GpuInfo)
  411. }
  412. for _, gpu := range oneapiGPUs {
  413. resp = append(resp, gpu.GpuInfo)
  414. }
  415. if len(resp) == 0 {
  416. resp = append(resp, cpus[0].GpuInfo)
  417. }
  418. return resp
  419. }
  420. func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
  421. // Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
  422. var ldPaths []string
  423. gpuLibPaths := []string{}
  424. slog.Debug("Searching for GPU library", "name", baseLibName)
  425. // Start with our bundled libraries
  426. patterns := []string{filepath.Join(LibraryDir(), baseLibName)}
  427. switch runtime.GOOS {
  428. case "windows":
  429. ldPaths = strings.Split(os.Getenv("PATH"), ";")
  430. case "linux":
  431. ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
  432. default:
  433. return gpuLibPaths
  434. }
  435. // Then with whatever we find in the PATH/LD_LIBRARY_PATH
  436. for _, ldPath := range ldPaths {
  437. d, err := filepath.Abs(ldPath)
  438. if err != nil {
  439. continue
  440. }
  441. patterns = append(patterns, filepath.Join(d, baseLibName))
  442. }
  443. patterns = append(patterns, defaultPatterns...)
  444. slog.Debug("gpu library search", "globs", patterns)
  445. for _, pattern := range patterns {
  446. // Nvidia PhysX known to return bogus results
  447. if strings.Contains(pattern, "PhysX") {
  448. slog.Debug("skipping PhysX cuda library path", "path", pattern)
  449. continue
  450. }
  451. // Ignore glob discovery errors
  452. matches, _ := filepath.Glob(pattern)
  453. for _, match := range matches {
  454. // Resolve any links so we don't try the same lib multiple times
  455. // and weed out any dups across globs
  456. libPath := match
  457. tmp := match
  458. var err error
  459. for ; err == nil; tmp, err = os.Readlink(libPath) {
  460. if !filepath.IsAbs(tmp) {
  461. tmp = filepath.Join(filepath.Dir(libPath), tmp)
  462. }
  463. libPath = tmp
  464. }
  465. new := true
  466. for _, cmp := range gpuLibPaths {
  467. if cmp == libPath {
  468. new = false
  469. break
  470. }
  471. }
  472. if new {
  473. gpuLibPaths = append(gpuLibPaths, libPath)
  474. }
  475. }
  476. }
  477. slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
  478. return gpuLibPaths
  479. }
  480. func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
  481. var resp C.cudart_init_resp_t
  482. resp.ch.verbose = getVerboseState()
  483. for _, libPath := range cudartLibPaths {
  484. lib := C.CString(libPath)
  485. defer C.free(unsafe.Pointer(lib))
  486. C.cudart_init(lib, &resp)
  487. if resp.err != nil {
  488. slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
  489. C.free(unsafe.Pointer(resp.err))
  490. } else {
  491. return int(resp.num_devices), &resp.ch, libPath
  492. }
  493. }
  494. return 0, nil, ""
  495. }
  496. func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
  497. var resp C.nvcuda_init_resp_t
  498. resp.ch.verbose = getVerboseState()
  499. for _, libPath := range nvcudaLibPaths {
  500. lib := C.CString(libPath)
  501. defer C.free(unsafe.Pointer(lib))
  502. C.nvcuda_init(lib, &resp)
  503. if resp.err != nil {
  504. // Decide what log level based on the type of error message to help users understand why
  505. msg := C.GoString(resp.err)
  506. switch resp.cudaErr {
  507. case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
  508. slog.Warn("version mismatch between driver and cuda driver library - reboot or upgrade may be required", "library", libPath, "error", msg)
  509. case C.CUDA_ERROR_NO_DEVICE:
  510. slog.Info("no nvidia devices detected", "library", libPath)
  511. case C.CUDA_ERROR_UNKNOWN:
  512. slog.Warn("unknown error initializing cuda driver library", "library", libPath, "error", msg)
  513. slog.Warn("see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information")
  514. default:
  515. if strings.Contains(msg, "wrong ELF class") {
  516. slog.Debug("skipping 32bit library", "library", libPath)
  517. } else {
  518. slog.Info("unable to load cuda driver library", "library", libPath, "error", msg)
  519. }
  520. }
  521. C.free(unsafe.Pointer(resp.err))
  522. } else {
  523. return int(resp.num_devices), &resp.ch, libPath
  524. }
  525. }
  526. return 0, nil, ""
  527. }
  528. func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
  529. var resp C.nvml_init_resp_t
  530. resp.ch.verbose = getVerboseState()
  531. for _, libPath := range nvmlLibPaths {
  532. lib := C.CString(libPath)
  533. defer C.free(unsafe.Pointer(lib))
  534. C.nvml_init(lib, &resp)
  535. if resp.err != nil {
  536. slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
  537. C.free(unsafe.Pointer(resp.err))
  538. } else {
  539. return &resp.ch, libPath
  540. }
  541. }
  542. return nil, ""
  543. }
  544. func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
  545. var resp C.oneapi_init_resp_t
  546. num_devices := 0
  547. resp.oh.verbose = getVerboseState()
  548. for _, libPath := range oneapiLibPaths {
  549. lib := C.CString(libPath)
  550. defer C.free(unsafe.Pointer(lib))
  551. C.oneapi_init(lib, &resp)
  552. if resp.err != nil {
  553. slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
  554. C.free(unsafe.Pointer(resp.err))
  555. } else {
  556. for i := range resp.oh.num_drivers {
  557. num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
  558. }
  559. return num_devices, &resp.oh, libPath
  560. }
  561. }
  562. return 0, nil, ""
  563. }
  564. func getVerboseState() C.uint16_t {
  565. if envconfig.Debug() {
  566. return C.uint16_t(1)
  567. }
  568. return C.uint16_t(0)
  569. }
  570. // Given the list of GPUs this instantiation is targeted for,
  571. // figure out the visible devices environment variable
  572. //
  573. // If different libraries are detected, the first one is what we use
  574. func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
  575. if len(l) == 0 {
  576. return "", ""
  577. }
  578. switch l[0].Library {
  579. case "cuda":
  580. return cudaGetVisibleDevicesEnv(l)
  581. case "rocm":
  582. return rocmGetVisibleDevicesEnv(l)
  583. case "oneapi":
  584. return oneapiGetVisibleDevicesEnv(l)
  585. default:
  586. slog.Debug("no filter required for library " + l[0].Library)
  587. return "", ""
  588. }
  589. }
  590. func LibraryDir() string {
  591. // On Windows/linux we bundle the dependencies at the same level as the executable
  592. appExe, err := os.Executable()
  593. if err != nil {
  594. slog.Warn("failed to lookup executable path", "error", err)
  595. }
  596. cwd, err := os.Getwd()
  597. if err != nil {
  598. slog.Warn("failed to lookup working directory", "error", err)
  599. }
  600. // Scan for any of our dependeices, and pick first match
  601. for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe()), cwd} {
  602. libDep := filepath.Join("lib", "ollama")
  603. if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
  604. return filepath.Join(root, libDep)
  605. }
  606. // Developer mode, local build
  607. if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
  608. return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)
  609. }
  610. if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
  611. return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)
  612. }
  613. }
  614. slog.Warn("unable to locate gpu dependency libraries")
  615. return ""
  616. }