gpu.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754
  1. //go:build linux || windows
  2. package discover
  3. /*
  4. #cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
  5. #cgo windows LDFLAGS: -lpthread
  6. #include "gpu_info.h"
  7. */
  8. import "C"
  9. import (
  10. "fmt"
  11. "log/slog"
  12. "os"
  13. "path/filepath"
  14. "runtime"
  15. "strings"
  16. "sync"
  17. "unsafe"
  18. "github.com/ollama/ollama/envconfig"
  19. "github.com/ollama/ollama/format"
  20. )
  21. type cudaHandles struct {
  22. deviceCount int
  23. cudart *C.cudart_handle_t
  24. nvcuda *C.nvcuda_handle_t
  25. nvml *C.nvml_handle_t
  26. }
  27. type oneapiHandles struct {
  28. oneapi *C.oneapi_handle_t
  29. deviceCount int
  30. }
  31. const (
  32. cudaMinimumMemory = 457 * format.MebiByte
  33. rocmMinimumMemory = 457 * format.MebiByte
  34. // TODO OneAPI minimum memory
  35. )
  36. var (
  37. gpuMutex sync.Mutex
  38. bootstrapped bool
  39. cpuCapability CPUCapability
  40. cpus []CPUInfo
  41. cudaGPUs []CudaGPUInfo
  42. nvcudaLibPath string
  43. cudartLibPath string
  44. oneapiLibPath string
  45. nvmlLibPath string
  46. rocmGPUs []RocmGPUInfo
  47. oneapiGPUs []OneapiGPUInfo
  48. // If any discovered GPUs are incompatible, report why
  49. unsupportedGPUs []UnsupportedGPUInfo
  50. // Keep track of errors during bootstrapping so that if GPUs are missing
  51. // they expected to be present this may explain why
  52. bootstrapErrors []error
  53. )
  54. // With our current CUDA compile flags, older than 5.0 will not work properly
  55. var CudaComputeMin = [2]C.int{5, 0}
  56. var RocmComputeMin = 9
  57. // TODO find a better way to detect iGPU instead of minimum memory
  58. const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
  59. // Note: gpuMutex must already be held
  60. func initCudaHandles() *cudaHandles {
  61. // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
  62. cHandles := &cudaHandles{}
  63. // Short Circuit if we already know which library to use
  64. // ignore bootstrap errors in this case since we already recorded them
  65. if nvmlLibPath != "" {
  66. cHandles.nvml, _, _ = loadNVMLMgmt([]string{nvmlLibPath})
  67. return cHandles
  68. }
  69. if nvcudaLibPath != "" {
  70. cHandles.deviceCount, cHandles.nvcuda, _, _ = loadNVCUDAMgmt([]string{nvcudaLibPath})
  71. return cHandles
  72. }
  73. if cudartLibPath != "" {
  74. cHandles.deviceCount, cHandles.cudart, _, _ = loadCUDARTMgmt([]string{cudartLibPath})
  75. return cHandles
  76. }
  77. slog.Debug("searching for GPU discovery libraries for NVIDIA")
  78. var cudartMgmtPatterns []string
  79. // Aligned with driver, we can't carry as payloads
  80. nvcudaMgmtPatterns := NvcudaGlobs
  81. if runtime.GOOS == "windows" {
  82. localAppData := os.Getenv("LOCALAPPDATA")
  83. cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
  84. }
  85. libDir := LibraryDir()
  86. if libDir != "" {
  87. cudartMgmtPatterns = []string{filepath.Join(libDir, CudartMgmtName)}
  88. }
  89. cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
  90. if len(NvmlGlobs) > 0 {
  91. nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
  92. if len(nvmlLibPaths) > 0 {
  93. nvml, libPath, err := loadNVMLMgmt(nvmlLibPaths)
  94. if nvml != nil {
  95. slog.Debug("nvidia-ml loaded", "library", libPath)
  96. cHandles.nvml = nvml
  97. nvmlLibPath = libPath
  98. }
  99. if err != nil {
  100. bootstrapErrors = append(bootstrapErrors, err)
  101. }
  102. }
  103. }
  104. nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
  105. if len(nvcudaLibPaths) > 0 {
  106. deviceCount, nvcuda, libPath, err := loadNVCUDAMgmt(nvcudaLibPaths)
  107. if nvcuda != nil {
  108. slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
  109. cHandles.nvcuda = nvcuda
  110. cHandles.deviceCount = deviceCount
  111. nvcudaLibPath = libPath
  112. return cHandles
  113. }
  114. if err != nil {
  115. bootstrapErrors = append(bootstrapErrors, err)
  116. }
  117. }
  118. cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
  119. if len(cudartLibPaths) > 0 {
  120. deviceCount, cudart, libPath, err := loadCUDARTMgmt(cudartLibPaths)
  121. if cudart != nil {
  122. slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
  123. cHandles.cudart = cudart
  124. cHandles.deviceCount = deviceCount
  125. cudartLibPath = libPath
  126. return cHandles
  127. }
  128. if err != nil {
  129. bootstrapErrors = append(bootstrapErrors, err)
  130. }
  131. }
  132. return cHandles
  133. }
  134. // Note: gpuMutex must already be held
  135. func initOneAPIHandles() *oneapiHandles {
  136. oHandles := &oneapiHandles{}
  137. // Short Circuit if we already know which library to use
  138. // ignore bootstrap errors in this case since we already recorded them
  139. if oneapiLibPath != "" {
  140. oHandles.deviceCount, oHandles.oneapi, _, _ = loadOneapiMgmt([]string{oneapiLibPath})
  141. return oHandles
  142. }
  143. oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
  144. if len(oneapiLibPaths) > 0 {
  145. var err error
  146. oHandles.deviceCount, oHandles.oneapi, oneapiLibPath, err = loadOneapiMgmt(oneapiLibPaths)
  147. if err != nil {
  148. bootstrapErrors = append(bootstrapErrors, err)
  149. }
  150. }
  151. return oHandles
  152. }
  153. func GetCPUInfo() GpuInfoList {
  154. gpuMutex.Lock()
  155. if !bootstrapped {
  156. gpuMutex.Unlock()
  157. GetGPUInfo()
  158. } else {
  159. gpuMutex.Unlock()
  160. }
  161. return GpuInfoList{cpus[0].GpuInfo}
  162. }
  163. func GetGPUInfo() GpuInfoList {
  164. // TODO - consider exploring lspci (and equivalent on windows) to check for
  165. // GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
  166. gpuMutex.Lock()
  167. defer gpuMutex.Unlock()
  168. needRefresh := true
  169. var cHandles *cudaHandles
  170. var oHandles *oneapiHandles
  171. defer func() {
  172. if cHandles != nil {
  173. if cHandles.cudart != nil {
  174. C.cudart_release(*cHandles.cudart)
  175. }
  176. if cHandles.nvcuda != nil {
  177. C.nvcuda_release(*cHandles.nvcuda)
  178. }
  179. if cHandles.nvml != nil {
  180. C.nvml_release(*cHandles.nvml)
  181. }
  182. }
  183. if oHandles != nil {
  184. if oHandles.oneapi != nil {
  185. // TODO - is this needed?
  186. C.oneapi_release(*oHandles.oneapi)
  187. }
  188. }
  189. }()
  190. if !bootstrapped {
  191. slog.Info("looking for compatible GPUs")
  192. bootstrapErrors = []error{}
  193. needRefresh = false
  194. cpuCapability = GetCPUCapability()
  195. var memInfo C.mem_info_t
  196. mem, err := GetCPUMem()
  197. if err != nil {
  198. slog.Warn("error looking up system memory", "error", err)
  199. }
  200. depPath := LibraryDir()
  201. details, err := GetCPUDetails()
  202. if err != nil {
  203. slog.Warn("failed to lookup CPU details", "error", err)
  204. }
  205. cpus = []CPUInfo{
  206. {
  207. GpuInfo: GpuInfo{
  208. memInfo: mem,
  209. Library: "cpu",
  210. Variant: cpuCapability.String(),
  211. ID: "0",
  212. DependencyPath: []string{depPath},
  213. },
  214. CPUs: details,
  215. },
  216. }
  217. // Fallback to CPU mode if we're lacking required vector extensions on x86
  218. if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
  219. err := fmt.Errorf("CPU does not have minimum vector extensions, GPU inference disabled. Required:%s Detected:%s", GPURunnerCPUCapability, cpuCapability)
  220. slog.Warn(err.Error())
  221. bootstrapErrors = append(bootstrapErrors, err)
  222. bootstrapped = true
  223. // No need to do any GPU discovery, since we can't run on them
  224. return GpuInfoList{cpus[0].GpuInfo}
  225. }
  226. // Load ALL libraries
  227. cHandles = initCudaHandles()
  228. // NVIDIA
  229. for i := range cHandles.deviceCount {
  230. if cHandles.cudart != nil || cHandles.nvcuda != nil {
  231. gpuInfo := CudaGPUInfo{
  232. GpuInfo: GpuInfo{
  233. Library: "cuda",
  234. },
  235. index: i,
  236. }
  237. var driverMajor int
  238. var driverMinor int
  239. if cHandles.cudart != nil {
  240. C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
  241. } else {
  242. C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
  243. driverMajor = int(cHandles.nvcuda.driver_major)
  244. driverMinor = int(cHandles.nvcuda.driver_minor)
  245. }
  246. if memInfo.err != nil {
  247. slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  248. C.free(unsafe.Pointer(memInfo.err))
  249. continue
  250. }
  251. gpuInfo.TotalMemory = uint64(memInfo.total)
  252. gpuInfo.FreeMemory = uint64(memInfo.free)
  253. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  254. gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
  255. gpuInfo.computeMajor = int(memInfo.major)
  256. gpuInfo.computeMinor = int(memInfo.minor)
  257. gpuInfo.MinimumMemory = cudaMinimumMemory
  258. gpuInfo.DriverMajor = driverMajor
  259. gpuInfo.DriverMinor = driverMinor
  260. variant := cudaVariant(gpuInfo)
  261. if depPath != "" {
  262. gpuInfo.DependencyPath = []string{depPath}
  263. // Check for variant specific directory
  264. if variant != "" {
  265. if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil {
  266. gpuInfo.DependencyPath = []string{filepath.Join(depPath, "cuda_"+variant), depPath}
  267. }
  268. }
  269. }
  270. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  271. gpuInfo.Variant = variant
  272. if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
  273. unsupportedGPUs = append(unsupportedGPUs,
  274. UnsupportedGPUInfo{
  275. GpuInfo: gpuInfo.GpuInfo,
  276. })
  277. slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
  278. continue
  279. }
  280. // query the management library as well so we can record any skew between the two
  281. // which represents overhead on the GPU we must set aside on subsequent updates
  282. if cHandles.nvml != nil {
  283. uuid := C.CString(gpuInfo.ID)
  284. defer C.free(unsafe.Pointer(uuid))
  285. C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
  286. if memInfo.err != nil {
  287. slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  288. C.free(unsafe.Pointer(memInfo.err))
  289. } else {
  290. if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
  291. gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
  292. slog.Info("detected OS VRAM overhead",
  293. "id", gpuInfo.ID,
  294. "library", gpuInfo.Library,
  295. "compute", gpuInfo.Compute,
  296. "driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
  297. "name", gpuInfo.Name,
  298. "overhead", format.HumanBytes2(gpuInfo.OSOverhead),
  299. )
  300. }
  301. }
  302. }
  303. // TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
  304. cudaGPUs = append(cudaGPUs, gpuInfo)
  305. }
  306. }
  307. // Intel
  308. if envconfig.IntelGPU() {
  309. oHandles = initOneAPIHandles()
  310. if oHandles != nil && oHandles.oneapi != nil {
  311. for d := range oHandles.oneapi.num_drivers {
  312. if oHandles.oneapi == nil {
  313. // shouldn't happen
  314. slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
  315. continue
  316. }
  317. devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
  318. for i := range devCount {
  319. gpuInfo := OneapiGPUInfo{
  320. GpuInfo: GpuInfo{
  321. Library: "oneapi",
  322. },
  323. driverIndex: int(d),
  324. gpuIndex: int(i),
  325. }
  326. // TODO - split bootstrapping from updating free memory
  327. C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
  328. // TODO - convert this to MinimumMemory based on testing...
  329. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  330. memInfo.free = C.uint64_t(totalFreeMem)
  331. gpuInfo.TotalMemory = uint64(memInfo.total)
  332. gpuInfo.FreeMemory = uint64(memInfo.free)
  333. gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
  334. gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
  335. gpuInfo.DependencyPath = []string{depPath}
  336. oneapiGPUs = append(oneapiGPUs, gpuInfo)
  337. }
  338. }
  339. }
  340. }
  341. rocmGPUs, err = AMDGetGPUInfo()
  342. if err != nil {
  343. bootstrapErrors = append(bootstrapErrors, err)
  344. }
  345. bootstrapped = true
  346. if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
  347. slog.Info("no compatible GPUs were discovered")
  348. }
  349. }
  350. // For detected GPUs, load library if not loaded
  351. // Refresh free memory usage
  352. if needRefresh {
  353. mem, err := GetCPUMem()
  354. if err != nil {
  355. slog.Warn("error looking up system memory", "error", err)
  356. } else {
  357. slog.Debug("updating system memory data",
  358. slog.Group(
  359. "before",
  360. "total", format.HumanBytes2(cpus[0].TotalMemory),
  361. "free", format.HumanBytes2(cpus[0].FreeMemory),
  362. "free_swap", format.HumanBytes2(cpus[0].FreeSwap),
  363. ),
  364. slog.Group(
  365. "now",
  366. "total", format.HumanBytes2(mem.TotalMemory),
  367. "free", format.HumanBytes2(mem.FreeMemory),
  368. "free_swap", format.HumanBytes2(mem.FreeSwap),
  369. ),
  370. )
  371. cpus[0].FreeMemory = mem.FreeMemory
  372. cpus[0].FreeSwap = mem.FreeSwap
  373. }
  374. var memInfo C.mem_info_t
  375. if cHandles == nil && len(cudaGPUs) > 0 {
  376. cHandles = initCudaHandles()
  377. }
  378. for i, gpu := range cudaGPUs {
  379. if cHandles.nvml != nil {
  380. uuid := C.CString(gpu.ID)
  381. defer C.free(unsafe.Pointer(uuid))
  382. C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
  383. } else if cHandles.cudart != nil {
  384. C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
  385. } else if cHandles.nvcuda != nil {
  386. C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
  387. memInfo.used = memInfo.total - memInfo.free
  388. } else {
  389. // shouldn't happen
  390. slog.Warn("no valid cuda library loaded to refresh vram usage")
  391. break
  392. }
  393. if memInfo.err != nil {
  394. slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
  395. C.free(unsafe.Pointer(memInfo.err))
  396. continue
  397. }
  398. if memInfo.free == 0 {
  399. slog.Warn("error looking up nvidia GPU memory")
  400. continue
  401. }
  402. if cHandles.nvml != nil && gpu.OSOverhead > 0 {
  403. // When using the management library update based on recorded overhead
  404. memInfo.free -= C.uint64_t(gpu.OSOverhead)
  405. }
  406. slog.Debug("updating cuda memory data",
  407. "gpu", gpu.ID,
  408. "name", gpu.Name,
  409. "overhead", format.HumanBytes2(gpu.OSOverhead),
  410. slog.Group(
  411. "before",
  412. "total", format.HumanBytes2(gpu.TotalMemory),
  413. "free", format.HumanBytes2(gpu.FreeMemory),
  414. ),
  415. slog.Group(
  416. "now",
  417. "total", format.HumanBytes2(uint64(memInfo.total)),
  418. "free", format.HumanBytes2(uint64(memInfo.free)),
  419. "used", format.HumanBytes2(uint64(memInfo.used)),
  420. ),
  421. )
  422. cudaGPUs[i].FreeMemory = uint64(memInfo.free)
  423. }
  424. if oHandles == nil && len(oneapiGPUs) > 0 {
  425. oHandles = initOneAPIHandles()
  426. }
  427. for i, gpu := range oneapiGPUs {
  428. if oHandles.oneapi == nil {
  429. // shouldn't happen
  430. slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
  431. continue
  432. }
  433. C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
  434. // TODO - convert this to MinimumMemory based on testing...
  435. var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
  436. memInfo.free = C.uint64_t(totalFreeMem)
  437. oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
  438. }
  439. err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
  440. if err != nil {
  441. slog.Debug("problem refreshing ROCm free memory", "error", err)
  442. }
  443. }
  444. resp := []GpuInfo{}
  445. for _, gpu := range cudaGPUs {
  446. resp = append(resp, gpu.GpuInfo)
  447. }
  448. for _, gpu := range rocmGPUs {
  449. resp = append(resp, gpu.GpuInfo)
  450. }
  451. for _, gpu := range oneapiGPUs {
  452. resp = append(resp, gpu.GpuInfo)
  453. }
  454. if len(resp) == 0 {
  455. resp = append(resp, cpus[0].GpuInfo)
  456. }
  457. return resp
  458. }
  459. func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
  460. // Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
  461. var ldPaths []string
  462. gpuLibPaths := []string{}
  463. slog.Debug("Searching for GPU library", "name", baseLibName)
  464. // Start with our bundled libraries
  465. patterns := []string{filepath.Join(LibraryDir(), baseLibName)}
  466. switch runtime.GOOS {
  467. case "windows":
  468. ldPaths = strings.Split(os.Getenv("PATH"), ";")
  469. case "linux":
  470. ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
  471. default:
  472. return gpuLibPaths
  473. }
  474. // Then with whatever we find in the PATH/LD_LIBRARY_PATH
  475. for _, ldPath := range ldPaths {
  476. d, err := filepath.Abs(ldPath)
  477. if err != nil {
  478. continue
  479. }
  480. patterns = append(patterns, filepath.Join(d, baseLibName))
  481. }
  482. patterns = append(patterns, defaultPatterns...)
  483. slog.Debug("gpu library search", "globs", patterns)
  484. for _, pattern := range patterns {
  485. // Nvidia PhysX known to return bogus results
  486. if strings.Contains(pattern, "PhysX") {
  487. slog.Debug("skipping PhysX cuda library path", "path", pattern)
  488. continue
  489. }
  490. // Ignore glob discovery errors
  491. matches, _ := filepath.Glob(pattern)
  492. for _, match := range matches {
  493. // Resolve any links so we don't try the same lib multiple times
  494. // and weed out any dups across globs
  495. libPath := match
  496. tmp := match
  497. var err error
  498. for ; err == nil; tmp, err = os.Readlink(libPath) {
  499. if !filepath.IsAbs(tmp) {
  500. tmp = filepath.Join(filepath.Dir(libPath), tmp)
  501. }
  502. libPath = tmp
  503. }
  504. new := true
  505. for _, cmp := range gpuLibPaths {
  506. if cmp == libPath {
  507. new = false
  508. break
  509. }
  510. }
  511. if new {
  512. gpuLibPaths = append(gpuLibPaths, libPath)
  513. }
  514. }
  515. }
  516. slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
  517. return gpuLibPaths
  518. }
  519. // Bootstrap the runtime library
  520. // Returns: num devices, handle, libPath, error
  521. func loadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string, error) {
  522. var resp C.cudart_init_resp_t
  523. resp.ch.verbose = getVerboseState()
  524. var err error
  525. for _, libPath := range cudartLibPaths {
  526. lib := C.CString(libPath)
  527. defer C.free(unsafe.Pointer(lib))
  528. C.cudart_init(lib, &resp)
  529. if resp.err != nil {
  530. err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
  531. slog.Debug(err.Error())
  532. C.free(unsafe.Pointer(resp.err))
  533. } else {
  534. err = nil
  535. return int(resp.num_devices), &resp.ch, libPath, err
  536. }
  537. }
  538. return 0, nil, "", err
  539. }
  540. // Bootstrap the driver library
  541. // Returns: num devices, handle, libPath, error
  542. func loadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string, error) {
  543. var resp C.nvcuda_init_resp_t
  544. resp.ch.verbose = getVerboseState()
  545. var err error
  546. for _, libPath := range nvcudaLibPaths {
  547. lib := C.CString(libPath)
  548. defer C.free(unsafe.Pointer(lib))
  549. C.nvcuda_init(lib, &resp)
  550. if resp.err != nil {
  551. // Decide what log level based on the type of error message to help users understand why
  552. switch resp.cudaErr {
  553. case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
  554. err = fmt.Errorf("version mismatch between driver and cuda driver library - reboot or upgrade may be required: library %s", libPath)
  555. slog.Warn(err.Error())
  556. case C.CUDA_ERROR_NO_DEVICE:
  557. err = fmt.Errorf("no nvidia devices detected by library %s", libPath)
  558. slog.Info(err.Error())
  559. case C.CUDA_ERROR_UNKNOWN:
  560. err = fmt.Errorf("unknown error initializing cuda driver library %s: %s. see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information", libPath, C.GoString(resp.err))
  561. slog.Warn(err.Error())
  562. default:
  563. msg := C.GoString(resp.err)
  564. if strings.Contains(msg, "wrong ELF class") {
  565. slog.Debug("skipping 32bit library", "library", libPath)
  566. } else {
  567. err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
  568. slog.Info(err.Error())
  569. }
  570. }
  571. C.free(unsafe.Pointer(resp.err))
  572. } else {
  573. err = nil
  574. return int(resp.num_devices), &resp.ch, libPath, err
  575. }
  576. }
  577. return 0, nil, "", err
  578. }
  579. // Bootstrap the management library
  580. // Returns: handle, libPath, error
  581. func loadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string, error) {
  582. var resp C.nvml_init_resp_t
  583. resp.ch.verbose = getVerboseState()
  584. var err error
  585. for _, libPath := range nvmlLibPaths {
  586. lib := C.CString(libPath)
  587. defer C.free(unsafe.Pointer(lib))
  588. C.nvml_init(lib, &resp)
  589. if resp.err != nil {
  590. err = fmt.Errorf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err))
  591. slog.Info(err.Error())
  592. C.free(unsafe.Pointer(resp.err))
  593. } else {
  594. err = nil
  595. return &resp.ch, libPath, err
  596. }
  597. }
  598. return nil, "", err
  599. }
  600. // bootstrap the Intel GPU library
  601. // Returns: num devices, handle, libPath, error
  602. func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, error) {
  603. var resp C.oneapi_init_resp_t
  604. num_devices := 0
  605. resp.oh.verbose = getVerboseState()
  606. var err error
  607. for _, libPath := range oneapiLibPaths {
  608. lib := C.CString(libPath)
  609. defer C.free(unsafe.Pointer(lib))
  610. C.oneapi_init(lib, &resp)
  611. if resp.err != nil {
  612. err = fmt.Errorf("Unable to load oneAPI management library %s: %s", libPath, C.GoString(resp.err))
  613. slog.Debug(err.Error())
  614. C.free(unsafe.Pointer(resp.err))
  615. } else {
  616. err = nil
  617. for i := range resp.oh.num_drivers {
  618. num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
  619. }
  620. return num_devices, &resp.oh, libPath, err
  621. }
  622. }
  623. return 0, nil, "", err
  624. }
  625. func getVerboseState() C.uint16_t {
  626. if envconfig.Debug() {
  627. return C.uint16_t(1)
  628. }
  629. return C.uint16_t(0)
  630. }
  631. // Given the list of GPUs this instantiation is targeted for,
  632. // figure out the visible devices environment variable
  633. //
  634. // If different libraries are detected, the first one is what we use
  635. func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
  636. if len(l) == 0 {
  637. return "", ""
  638. }
  639. switch l[0].Library {
  640. case "cuda":
  641. return cudaGetVisibleDevicesEnv(l)
  642. case "rocm":
  643. return rocmGetVisibleDevicesEnv(l)
  644. case "oneapi":
  645. return oneapiGetVisibleDevicesEnv(l)
  646. default:
  647. slog.Debug("no filter required for library " + l[0].Library)
  648. return "", ""
  649. }
  650. }
  651. func LibraryDir() string {
  652. // On Windows/linux we bundle the dependencies at the same level as the executable
  653. appExe, err := os.Executable()
  654. if err != nil {
  655. slog.Warn("failed to lookup executable path", "error", err)
  656. }
  657. cwd, err := os.Getwd()
  658. if err != nil {
  659. slog.Warn("failed to lookup working directory", "error", err)
  660. }
  661. // Scan for any of our dependeices, and pick first match
  662. for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe()), cwd} {
  663. libDep := filepath.Join("lib", "ollama")
  664. if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
  665. return filepath.Join(root, libDep)
  666. }
  667. // Developer mode, local build
  668. if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
  669. return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)
  670. }
  671. if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
  672. return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)
  673. }
  674. }
  675. slog.Warn("unable to locate gpu dependency libraries")
  676. return ""
  677. }
  678. func GetSystemInfo() SystemInfo {
  679. gpus := GetGPUInfo()
  680. gpuMutex.Lock()
  681. defer gpuMutex.Unlock()
  682. discoveryErrors := []string{}
  683. for _, err := range bootstrapErrors {
  684. discoveryErrors = append(discoveryErrors, err.Error())
  685. }
  686. if len(gpus) == 1 && gpus[0].Library == "cpu" {
  687. gpus = []GpuInfo{}
  688. }
  689. return SystemInfo{
  690. System: cpus[0],
  691. GPUs: gpus,
  692. UnsupportedGPUs: unsupportedGPUs,
  693. DiscoveryErrors: discoveryErrors,
  694. }
  695. }