gpu.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703
  1. //go:build linux || windows
  2. package gpu
  3. /*
  4. #cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
  5. #cgo windows LDFLAGS: -lpthread
  6. #include "gpu_info.h"
  7. */
  8. import "C"
  9. import (
  10. "fmt"
  11. "log/slog"
  12. "os"
  13. "path/filepath"
  14. "regexp"
  15. "runtime"
  16. "strconv"
  17. "strings"
  18. "sync"
  19. "unsafe"
  20. "github.com/ollama/ollama/envconfig"
  21. "github.com/ollama/ollama/format"
  22. )
  23. type cudaHandles struct {
  24. deviceCount int
  25. cudart *C.cudart_handle_t
  26. nvcuda *C.nvcuda_handle_t
  27. nvml *C.nvml_handle_t
  28. }
  29. type oneapiHandles struct {
  30. oneapi *C.oneapi_handle_t
  31. deviceCount int
  32. }
  33. const (
  34. cudaMinimumMemory = 457 * format.MebiByte
  35. rocmMinimumMemory = 457 * format.MebiByte
  36. // TODO OneAPI minimum memory
  37. )
  38. var (
  39. gpuMutex sync.Mutex
  40. bootstrapped bool
  41. cpuCapability CPUCapability
  42. cpus []CPUInfo
  43. cudaGPUs []CudaGPUInfo
  44. nvcudaLibPath string
  45. cudartLibPath string
  46. oneapiLibPath string
  47. nvmlLibPath string
  48. rocmGPUs []RocmGPUInfo
  49. oneapiGPUs []OneapiGPUInfo
  50. )
  51. // With our current CUDA compile flags, older than 5.0 will not work properly
  52. var CudaComputeMin = [2]C.int{5, 0}
  53. var RocmComputeMin = 9
  54. // TODO find a better way to detect iGPU instead of minimum memory
  55. const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
  56. // Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
  57. // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
  58. var CudaTegra string = os.Getenv("JETSON_JETPACK")
  59. // Note: gpuMutex must already be held
  60. func initCudaHandles() *cudaHandles {
  61. // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
  62. cHandles := &cudaHandles{}
  63. // Short Circuit if we already know which library to use
  64. if nvmlLibPath != "" {
  65. cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
  66. return cHandles
  67. }
  68. if nvcudaLibPath != "" {
  69. cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
  70. return cHandles
  71. }
  72. if cudartLibPath != "" {
  73. cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
  74. return cHandles
  75. }
  76. slog.Debug("searching for GPU discovery libraries for NVIDIA")
  77. var cudartMgmtPatterns []string
  78. // Aligned with driver, we can't carry as payloads
  79. nvcudaMgmtPatterns := NvcudaGlobs
  80. if runtime.GOOS == "windows" {
  81. localAppData := os.Getenv("LOCALAPPDATA")
  82. cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
  83. }
  84. tmpDir, _ := PayloadsDir()
  85. if tmpDir != "" {
  86. // TODO - add "payloads" for subprocess
  87. cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)}
  88. }
  89. cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
  90. if len(NvmlGlobs) > 0 {
  91. nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
  92. if len(nvmlLibPaths) > 0 {
  93. nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
  94. if nvml != nil {
  95. slog.Debug("nvidia-ml loaded", "library", libPath)
  96. cHandles.nvml = nvml
  97. nvmlLibPath = libPath
  98. }
  99. }
  100. }
  101. nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
  102. if len(nvcudaLibPaths) > 0 {
  103. deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
  104. if nvcuda != nil {
  105. slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
  106. cHandles.nvcuda = nvcuda
  107. cHandles.deviceCount = deviceCount
  108. nvcudaLibPath = libPath
  109. return cHandles
  110. }
  111. }
  112. cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
  113. if len(cudartLibPaths) > 0 {
  114. deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
  115. if cudart != nil {
  116. slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
  117. cHandles.cudart = cudart
  118. cHandles.deviceCount = deviceCount
  119. cudartLibPath = libPath
  120. return cHandles
  121. }
  122. }
  123. return cHandles
  124. }
  125. // Note: gpuMutex must already be held
  126. func initOneAPIHandles() *oneapiHandles {
  127. oHandles := &oneapiHandles{}
  128. // Short Circuit if we already know which library to use
  129. if oneapiLibPath != "" {
  130. oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
  131. return oHandles
  132. }
  133. oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
  134. if len(oneapiLibPaths) > 0 {
  135. oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
  136. }
  137. return oHandles
  138. }
  139. func GetCPUInfo() GpuInfoList {
  140. gpuMutex.Lock()
  141. if !bootstrapped {
  142. gpuMutex.Unlock()
  143. GetGPUInfo()
  144. } else {
  145. gpuMutex.Unlock()
  146. }
  147. return GpuInfoList{cpus[0].GpuInfo}
  148. }
  149. func GetGPUInfo() GpuInfoList {
  150. // TODO - consider exploring lspci (and equivalent on windows) to check for
  151. // GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
  152. gpuMutex.Lock()
  153. defer gpuMutex.Unlock()
  154. needRefresh := true
  155. var cHandles *cudaHandles
  156. var oHandles *oneapiHandles
  157. defer func() {
  158. if cHandles != nil {
  159. if cHandles.cudart != nil {
  160. C.cudart_release(*cHandles.cudart)
  161. }
  162. if cHandles.nvcuda != nil {
  163. C.nvcuda_release(*cHandles.nvcuda)
  164. }
  165. if cHandles.nvml != nil {
  166. C.nvml_release(*cHandles.nvml)
  167. }
  168. }
  169. if oHandles != nil {
  170. if oHandles.oneapi != nil {
  171. // TODO - is this needed?
  172. C.oneapi_release(*oHandles.oneapi)
  173. }
  174. }
  175. }()
  176. if !bootstrapped {
  177. slog.Info("looking for compatible GPUs")
  178. needRefresh = false
  179. cpuCapability = GetCPUCapability()
  180. var memInfo C.mem_info_t
  181. mem, err := GetCPUMem()
  182. if err != nil {
  183. slog.Warn("error looking up system memory", "error", err)
  184. }
  185. cpus = []CPUInfo{
  186. {
  187. GpuInfo: GpuInfo{
  188. memInfo: mem,
  189. Library: "cpu",
  190. Variant: cpuCapability.String(),
  191. ID: "0",
  192. },
  193. },
  194. }
  195. // Fallback to CPU mode if we're lacking required vector extensions on x86
  196. if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
  197. slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability)
  198. bootstrapped = true
  199. // No need to do any GPU discovery, since we can't run on them
  200. return GpuInfoList{cpus[0].GpuInfo}
  201. }
  202. depPath := GetDepDir()
  203. var cudaVariant string
  204. if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
  205. if CudaTegra != "" {
  206. ver := strings.Split(CudaTegra, ".")
  207. if len(ver) > 0 {
  208. cudaVariant = "jetpack" + ver[0]
  209. }
  210. } else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
  211. r := regexp.MustCompile(` R(\d+) `)
  212. m := r.FindSubmatch(data)
  213. if len(m) != 2 {
  214. slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version")
  215. } else {
  216. if l4t, err := strconv.Atoi(string(m[1])); err == nil {
  217. // Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
  218. // https://developer.nvidia.com/embedded/jetpack-archive
  219. switch l4t {
  220. case 35:
  221. cudaVariant = "jetpack5"
  222. case 36:
  223. cudaVariant = "jetpack6"
  224. default:
  225. slog.Info("unsupported L4T version", "nv_tegra_release", string(data))
  226. }
  227. }
  228. }
  229. }
  230. }
  231. // Load ALL libraries
  232. cHandles = initCudaHandles()
  233. // NVIDIA
  234. for i := range cHandles.deviceCount {
  235. if cHandles.cudart != nil || cHandles.nvcuda != nil {
  236. gpuInfo := CudaGPUInfo{
  237. GpuInfo: GpuInfo{
  238. Library: "cuda",
  239. Variant: cudaVariant,
  240. },
  241. index: i,
  242. }
  243. var driverMajor int
  244. var driverMinor int
  245. if cHandles.cudart != nil {
  246. C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
  247. } else {
  248. C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
  249. driverMajor = int(cHandles.nvcuda.driver_major)
  250. driverMinor = int(cHandles.nvcuda.driver_minor)
  251. }
  252. if memInfo.err != nil {
  253. slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  254. C.free(unsafe.Pointer(memInfo.err))
  255. continue
  256. }
  257. if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
  258. slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
  259. continue
  260. }
  261. gpuInfo.TotalMemory = uint64(memInfo.total)
  262. gpuInfo.FreeMemory = uint64(memInfo.free)
  263. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  264. gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
  265. gpuInfo.MinimumMemory = cudaMinimumMemory
  266. if depPath != "" {
  267. gpuInfo.DependencyPath = depPath
  268. // Check for variant specific directory
  269. if cudaVariant != "" {
  270. if _, err := os.Stat(filepath.Join(depPath, "cuda_"+cudaVariant)); err == nil {
  271. gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+cudaVariant)
  272. }
  273. }
  274. }
  275. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  276. gpuInfo.DriverMajor = driverMajor
  277. gpuInfo.DriverMinor = driverMinor
  278. // query the management library as well so we can record any skew between the two
  279. // which represents overhead on the GPU we must set aside on subsequent updates
  280. if cHandles.nvml != nil {
  281. C.nvml_get_free(*cHandles.nvml, C.int(gpuInfo.index), &memInfo.free, &memInfo.total, &memInfo.used)
  282. if memInfo.err != nil {
  283. slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  284. C.free(unsafe.Pointer(memInfo.err))
  285. } else {
  286. if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
  287. gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
  288. slog.Info("detected OS VRAM overhead",
  289. "id", gpuInfo.ID,
  290. "library", gpuInfo.Library,
  291. "compute", gpuInfo.Compute,
  292. "driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
  293. "name", gpuInfo.Name,
  294. "overhead", format.HumanBytes2(gpuInfo.OSOverhead),
  295. )
  296. }
  297. }
  298. }
  299. // TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
  300. cudaGPUs = append(cudaGPUs, gpuInfo)
  301. }
  302. }
  303. // Intel
  304. if envconfig.IntelGPU() {
  305. oHandles = initOneAPIHandles()
  306. if oHandles != nil && oHandles.oneapi != nil {
  307. for d := range oHandles.oneapi.num_drivers {
  308. if oHandles.oneapi == nil {
  309. // shouldn't happen
  310. slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
  311. continue
  312. }
  313. devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
  314. for i := range devCount {
  315. gpuInfo := OneapiGPUInfo{
  316. GpuInfo: GpuInfo{
  317. Library: "oneapi",
  318. },
  319. driverIndex: int(d),
  320. gpuIndex: int(i),
  321. }
  322. // TODO - split bootstrapping from updating free memory
  323. C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
  324. // TODO - convert this to MinimumMemory based on testing...
  325. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  326. memInfo.free = C.uint64_t(totalFreeMem)
  327. gpuInfo.TotalMemory = uint64(memInfo.total)
  328. gpuInfo.FreeMemory = uint64(memInfo.free)
  329. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  330. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  331. gpuInfo.DependencyPath = depPath
  332. oneapiGPUs = append(oneapiGPUs, gpuInfo)
  333. }
  334. }
  335. }
  336. }
  337. rocmGPUs = AMDGetGPUInfo()
  338. bootstrapped = true
  339. if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
  340. slog.Info("no compatible GPUs were discovered")
  341. }
  342. }
  343. // For detected GPUs, load library if not loaded
  344. // Refresh free memory usage
  345. if needRefresh {
  346. mem, err := GetCPUMem()
  347. if err != nil {
  348. slog.Warn("error looking up system memory", "error", err)
  349. } else {
  350. slog.Debug("updating system memory data",
  351. slog.Group(
  352. "before",
  353. "total", format.HumanBytes2(cpus[0].TotalMemory),
  354. "free", format.HumanBytes2(cpus[0].FreeMemory),
  355. "free_swap", format.HumanBytes2(cpus[0].FreeSwap),
  356. ),
  357. slog.Group(
  358. "now",
  359. "total", format.HumanBytes2(mem.TotalMemory),
  360. "free", format.HumanBytes2(mem.FreeMemory),
  361. "free_swap", format.HumanBytes2(mem.FreeSwap),
  362. ),
  363. )
  364. cpus[0].FreeMemory = mem.FreeMemory
  365. cpus[0].FreeSwap = mem.FreeSwap
  366. }
  367. var memInfo C.mem_info_t
  368. if cHandles == nil && len(cudaGPUs) > 0 {
  369. cHandles = initCudaHandles()
  370. }
  371. for i, gpu := range cudaGPUs {
  372. if cHandles.nvml != nil {
  373. C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
  374. } else if cHandles.cudart != nil {
  375. C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
  376. } else if cHandles.nvcuda != nil {
  377. C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
  378. memInfo.used = memInfo.total - memInfo.free
  379. } else {
  380. // shouldn't happen
  381. slog.Warn("no valid cuda library loaded to refresh vram usage")
  382. break
  383. }
  384. if memInfo.err != nil {
  385. slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  386. C.free(unsafe.Pointer(memInfo.err))
  387. continue
  388. }
  389. if memInfo.free == 0 {
  390. slog.Warn("error looking up nvidia GPU memory")
  391. continue
  392. }
  393. if cHandles.nvml != nil && gpu.OSOverhead > 0 {
  394. // When using the management library update based on recorded overhead
  395. memInfo.free -= C.uint64_t(gpu.OSOverhead)
  396. }
  397. slog.Debug("updating cuda memory data",
  398. "gpu", gpu.ID,
  399. "name", gpu.Name,
  400. "overhead", format.HumanBytes2(gpu.OSOverhead),
  401. slog.Group(
  402. "before",
  403. "total", format.HumanBytes2(gpu.TotalMemory),
  404. "free", format.HumanBytes2(gpu.FreeMemory),
  405. ),
  406. slog.Group(
  407. "now",
  408. "total", format.HumanBytes2(uint64(memInfo.total)),
  409. "free", format.HumanBytes2(uint64(memInfo.free)),
  410. "used", format.HumanBytes2(uint64(memInfo.used)),
  411. ),
  412. )
  413. cudaGPUs[i].FreeMemory = uint64(memInfo.free)
  414. }
  415. if oHandles == nil && len(oneapiGPUs) > 0 {
  416. oHandles = initOneAPIHandles()
  417. }
  418. for i, gpu := range oneapiGPUs {
  419. if oHandles.oneapi == nil {
  420. // shouldn't happen
  421. slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
  422. continue
  423. }
  424. C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
  425. // TODO - convert this to MinimumMemory based on testing...
  426. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  427. memInfo.free = C.uint64_t(totalFreeMem)
  428. oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
  429. }
  430. err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
  431. if err != nil {
  432. slog.Debug("problem refreshing ROCm free memory", "error", err)
  433. }
  434. }
  435. resp := []GpuInfo{}
  436. for _, gpu := range cudaGPUs {
  437. resp = append(resp, gpu.GpuInfo)
  438. }
  439. for _, gpu := range rocmGPUs {
  440. resp = append(resp, gpu.GpuInfo)
  441. }
  442. for _, gpu := range oneapiGPUs {
  443. resp = append(resp, gpu.GpuInfo)
  444. }
  445. if len(resp) == 0 {
  446. resp = append(resp, cpus[0].GpuInfo)
  447. }
  448. return resp
  449. }
  450. func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
  451. // Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
  452. var ldPaths []string
  453. gpuLibPaths := []string{}
  454. slog.Debug("Searching for GPU library", "name", baseLibName)
  455. // Start with our bundled libraries
  456. patterns := []string{filepath.Join(GetDepDir(), baseLibName)}
  457. switch runtime.GOOS {
  458. case "windows":
  459. ldPaths = strings.Split(os.Getenv("PATH"), ";")
  460. case "linux":
  461. ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
  462. default:
  463. return gpuLibPaths
  464. }
  465. // Then with whatever we find in the PATH/LD_LIBRARY_PATH
  466. for _, ldPath := range ldPaths {
  467. d, err := filepath.Abs(ldPath)
  468. if err != nil {
  469. continue
  470. }
  471. patterns = append(patterns, filepath.Join(d, baseLibName))
  472. }
  473. patterns = append(patterns, defaultPatterns...)
  474. slog.Debug("gpu library search", "globs", patterns)
  475. for _, pattern := range patterns {
  476. // Nvidia PhysX known to return bogus results
  477. if strings.Contains(pattern, "PhysX") {
  478. slog.Debug("skipping PhysX cuda library path", "path", pattern)
  479. continue
  480. }
  481. // Ignore glob discovery errors
  482. matches, _ := filepath.Glob(pattern)
  483. for _, match := range matches {
  484. // Resolve any links so we don't try the same lib multiple times
  485. // and weed out any dups across globs
  486. libPath := match
  487. tmp := match
  488. var err error
  489. for ; err == nil; tmp, err = os.Readlink(libPath) {
  490. if !filepath.IsAbs(tmp) {
  491. tmp = filepath.Join(filepath.Dir(libPath), tmp)
  492. }
  493. libPath = tmp
  494. }
  495. new := true
  496. for _, cmp := range gpuLibPaths {
  497. if cmp == libPath {
  498. new = false
  499. break
  500. }
  501. }
  502. if new {
  503. gpuLibPaths = append(gpuLibPaths, libPath)
  504. }
  505. }
  506. }
  507. slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
  508. return gpuLibPaths
  509. }
  510. func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
  511. var resp C.cudart_init_resp_t
  512. resp.ch.verbose = getVerboseState()
  513. for _, libPath := range cudartLibPaths {
  514. lib := C.CString(libPath)
  515. defer C.free(unsafe.Pointer(lib))
  516. C.cudart_init(lib, &resp)
  517. if resp.err != nil {
  518. slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
  519. C.free(unsafe.Pointer(resp.err))
  520. } else {
  521. return int(resp.num_devices), &resp.ch, libPath
  522. }
  523. }
  524. return 0, nil, ""
  525. }
  526. func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
  527. var resp C.nvcuda_init_resp_t
  528. resp.ch.verbose = getVerboseState()
  529. for _, libPath := range nvcudaLibPaths {
  530. lib := C.CString(libPath)
  531. defer C.free(unsafe.Pointer(lib))
  532. C.nvcuda_init(lib, &resp)
  533. if resp.err != nil {
  534. // Decide what log level based on the type of error message to help users understand why
  535. msg := C.GoString(resp.err)
  536. switch resp.cudaErr {
  537. case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
  538. slog.Warn("version mismatch between driver and cuda driver library - reboot or upgrade may be required", "library", libPath, "error", msg)
  539. case C.CUDA_ERROR_NO_DEVICE:
  540. slog.Info("no nvidia devices detected", "library", libPath)
  541. case C.CUDA_ERROR_UNKNOWN:
  542. slog.Warn("unknown error initializing cuda driver library", "library", libPath, "error", msg)
  543. slog.Warn("see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information")
  544. default:
  545. if strings.Contains(msg, "wrong ELF class") {
  546. slog.Debug("skipping 32bit library", "library", libPath)
  547. } else {
  548. slog.Info("unable to load cuda driver library", "library", libPath, "error", msg)
  549. }
  550. }
  551. C.free(unsafe.Pointer(resp.err))
  552. } else {
  553. return int(resp.num_devices), &resp.ch, libPath
  554. }
  555. }
  556. return 0, nil, ""
  557. }
  558. func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
  559. var resp C.nvml_init_resp_t
  560. resp.ch.verbose = getVerboseState()
  561. for _, libPath := range nvmlLibPaths {
  562. lib := C.CString(libPath)
  563. defer C.free(unsafe.Pointer(lib))
  564. C.nvml_init(lib, &resp)
  565. if resp.err != nil {
  566. slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
  567. C.free(unsafe.Pointer(resp.err))
  568. } else {
  569. return &resp.ch, libPath
  570. }
  571. }
  572. return nil, ""
  573. }
  574. func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
  575. var resp C.oneapi_init_resp_t
  576. num_devices := 0
  577. resp.oh.verbose = getVerboseState()
  578. for _, libPath := range oneapiLibPaths {
  579. lib := C.CString(libPath)
  580. defer C.free(unsafe.Pointer(lib))
  581. C.oneapi_init(lib, &resp)
  582. if resp.err != nil {
  583. slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
  584. C.free(unsafe.Pointer(resp.err))
  585. } else {
  586. for i := range resp.oh.num_drivers {
  587. num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
  588. }
  589. return num_devices, &resp.oh, libPath
  590. }
  591. }
  592. return 0, nil, ""
  593. }
  594. func getVerboseState() C.uint16_t {
  595. if envconfig.Debug() {
  596. return C.uint16_t(1)
  597. }
  598. return C.uint16_t(0)
  599. }
  600. // Given the list of GPUs this instantiation is targeted for,
  601. // figure out the visible devices environment variable
  602. //
  603. // If different libraries are detected, the first one is what we use
  604. func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
  605. if len(l) == 0 {
  606. return "", ""
  607. }
  608. switch l[0].Library {
  609. case "cuda":
  610. return cudaGetVisibleDevicesEnv(l)
  611. case "rocm":
  612. return rocmGetVisibleDevicesEnv(l)
  613. case "oneapi":
  614. return oneapiGetVisibleDevicesEnv(l)
  615. default:
  616. slog.Debug("no filter required for library " + l[0].Library)
  617. return "", ""
  618. }
  619. }
  620. func GetDepDir() string {
  621. // On Windows/linux we bundle the dependencies at the same level as the executable
  622. appExe, err := os.Executable()
  623. if err != nil {
  624. slog.Warn("failed to lookup executable path", "error", err)
  625. }
  626. cwd, err := os.Getwd()
  627. if err != nil {
  628. slog.Warn("failed to lookup working directory", "error", err)
  629. }
  630. // Scan for any of our dependeices, and pick first match
  631. for _, root := range []string{filepath.Dir(appExe), cwd} {
  632. libDep := "ollama_libs"
  633. if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
  634. return filepath.Join(root, libDep)
  635. }
  636. // Developer mode, local build
  637. if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
  638. return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)
  639. }
  640. if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
  641. return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)
  642. }
  643. }
  644. slog.Warn("unable to locate gpu dependency libraries")
  645. return ""
  646. }