payload_common.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. package llm
  2. import (
  3. "compress/gzip"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/fs"
  8. "log/slog"
  9. "os"
  10. "path/filepath"
  11. "runtime"
  12. "strings"
  13. "sync"
  14. "golang.org/x/exp/slices"
  15. "golang.org/x/sync/errgroup"
  16. "github.com/ollama/ollama/gpu"
  17. )
  18. // Libraries names may contain an optional variant separated by '_'
  19. // For example, "rocm_v6" and "rocm_v5" or "cpu" and "cpu_avx2"
  20. // Any library without a variant is the lowest common denominator
  21. var availableDynLibs = map[string]string{}
  22. const pathComponentCount = 7
  23. // getDynLibs returns an ordered list of LLM libraries to try, starting with the best
  24. func getDynLibs(gpuInfo gpu.GpuInfo) []string {
  25. // Short circuit if we know we're using the default built-in (darwin only)
  26. if gpuInfo.Library == "default" {
  27. return []string{"default"}
  28. }
  29. // TODO - temporary until we have multiple CPU variations for Darwin
  30. // Short circuit on darwin with metal only
  31. if len(availableDynLibs) == 1 {
  32. if _, onlyMetal := availableDynLibs["metal"]; onlyMetal {
  33. return []string{availableDynLibs["metal"]}
  34. }
  35. }
  36. exactMatch := ""
  37. dynLibs := []string{}
  38. altDynLibs := []string{}
  39. requested := gpuInfo.Library
  40. if gpuInfo.Variant != "" {
  41. requested += "_" + gpuInfo.Variant
  42. }
  43. // Try to find an exact match
  44. for cmp := range availableDynLibs {
  45. if requested == cmp {
  46. exactMatch = cmp
  47. dynLibs = []string{availableDynLibs[cmp]}
  48. break
  49. }
  50. }
  51. // Then for GPUs load alternates and sort the list for consistent load ordering
  52. if gpuInfo.Library != "cpu" {
  53. for cmp := range availableDynLibs {
  54. if gpuInfo.Library == strings.Split(cmp, "_")[0] && cmp != exactMatch {
  55. altDynLibs = append(altDynLibs, cmp)
  56. }
  57. }
  58. slices.Sort(altDynLibs)
  59. for _, altDynLib := range altDynLibs {
  60. dynLibs = append(dynLibs, availableDynLibs[altDynLib])
  61. }
  62. }
  63. // Load up the best CPU variant if not primary requested
  64. if gpuInfo.Library != "cpu" {
  65. variant := gpu.GetCPUVariant()
  66. // If no variant, then we fall back to default
  67. // If we have a variant, try that if we find an exact match
  68. // Attempting to run the wrong CPU instructions will panic the
  69. // process
  70. if variant != "" {
  71. for cmp := range availableDynLibs {
  72. if cmp == "cpu_"+variant {
  73. dynLibs = append(dynLibs, availableDynLibs[cmp])
  74. break
  75. }
  76. }
  77. } else {
  78. dynLibs = append(dynLibs, availableDynLibs["cpu"])
  79. }
  80. }
  81. // Finally, if we didn't find any matches, LCD CPU FTW
  82. if len(dynLibs) == 0 {
  83. dynLibs = []string{availableDynLibs["cpu"]}
  84. }
  85. slog.Debug(fmt.Sprintf("ordered list of LLM libraries to try %v", dynLibs))
  86. return dynLibs
  87. }
  88. func rocmDynLibPresent() bool {
  89. for dynLibName := range availableDynLibs {
  90. if strings.HasPrefix(dynLibName, "rocm") {
  91. return true
  92. }
  93. }
  94. return false
  95. }
  96. func nativeInit() error {
  97. payloadsDir, err := gpu.PayloadsDir()
  98. if err != nil {
  99. return err
  100. }
  101. slog.Info(fmt.Sprintf("Extracting dynamic libraries to %s ...", payloadsDir))
  102. libs, err := extractDynamicLibs(payloadsDir, "llama.cpp/build/*/*/*/lib/*")
  103. if err != nil {
  104. if errors.Is(err, payloadMissing) {
  105. slog.Info(fmt.Sprintf("%s", payloadMissing))
  106. return nil
  107. }
  108. return err
  109. }
  110. for _, lib := range libs {
  111. // The last dir component is the variant name
  112. variant := filepath.Base(filepath.Dir(lib))
  113. availableDynLibs[variant] = lib
  114. }
  115. if err := verifyDriverAccess(); err != nil {
  116. return err
  117. }
  118. // Report which dynamic libraries we have loaded to assist troubleshooting
  119. variants := make([]string, len(availableDynLibs))
  120. i := 0
  121. for variant := range availableDynLibs {
  122. variants[i] = variant
  123. i++
  124. }
  125. slog.Info(fmt.Sprintf("Dynamic LLM libraries %v", variants))
  126. slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
  127. return nil
  128. }
  129. func extractDynamicLibs(payloadsDir, glob string) ([]string, error) {
  130. files, err := fs.Glob(libEmbed, glob)
  131. if err != nil || len(files) == 0 {
  132. return nil, payloadMissing
  133. }
  134. var mu sync.Mutex
  135. var libs []string
  136. var g errgroup.Group
  137. for _, file := range files {
  138. pathComps := strings.Split(file, "/")
  139. if len(pathComps) != pathComponentCount {
  140. slog.Error(fmt.Sprintf("unexpected payload components: %v", pathComps))
  141. continue
  142. }
  143. file := file
  144. g.Go(func() error {
  145. // llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
  146. // Include the variant in the path to avoid conflicts between multiple server libs
  147. targetDir := filepath.Join(payloadsDir, pathComps[pathComponentCount-3])
  148. srcFile, err := libEmbed.Open(file)
  149. if err != nil {
  150. return fmt.Errorf("read payload %s: %v", file, err)
  151. }
  152. defer srcFile.Close()
  153. if err := os.MkdirAll(targetDir, 0o755); err != nil {
  154. return fmt.Errorf("create payload lib dir %s: %v", payloadsDir, err)
  155. }
  156. src := io.Reader(srcFile)
  157. filename := file
  158. if strings.HasSuffix(file, ".gz") {
  159. src, err = gzip.NewReader(src)
  160. if err != nil {
  161. return fmt.Errorf("decompress payload %s: %v", file, err)
  162. }
  163. filename = strings.TrimSuffix(filename, ".gz")
  164. }
  165. destFile := filepath.Join(targetDir, filepath.Base(filename))
  166. if strings.Contains(destFile, "server") {
  167. mu.Lock()
  168. libs = append(libs, destFile)
  169. mu.Unlock()
  170. }
  171. destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
  172. if err != nil {
  173. return fmt.Errorf("write payload %s: %v", file, err)
  174. }
  175. defer destFp.Close()
  176. if _, err := io.Copy(destFp, src); err != nil {
  177. return fmt.Errorf("copy payload %s: %v", file, err)
  178. }
  179. return nil
  180. })
  181. }
  182. err = g.Wait()
  183. if err != nil {
  184. // If we fail to extract, the payload dir is unusable, so cleanup whatever we extracted
  185. gpu.Cleanup()
  186. return nil, err
  187. }
  188. return libs, nil
  189. }
  190. func verifyDriverAccess() error {
  191. if runtime.GOOS != "linux" {
  192. return nil
  193. }
  194. // Only check ROCm access if we have the dynamic lib loaded
  195. if rocmDynLibPresent() {
  196. // Verify we have permissions - either running as root, or we have group access to the driver
  197. fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666)
  198. if err != nil {
  199. if errors.Is(err, fs.ErrPermission) {
  200. return fmt.Errorf("Radeon card detected, but permissions not set up properly. Either run ollama as root, or add you user account to the render group.")
  201. } else if errors.Is(err, fs.ErrNotExist) {
  202. // expected behavior without a radeon card
  203. return nil
  204. }
  205. return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
  206. }
  207. fd.Close()
  208. }
  209. return nil
  210. }