payload_common.go 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. package llm
  2. import (
  3. "compress/gzip"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/fs"
  8. "log/slog"
  9. "os"
  10. "path/filepath"
  11. "runtime"
  12. "strings"
  13. "golang.org/x/exp/slices"
  14. "golang.org/x/sync/errgroup"
  15. "github.com/jmorganca/ollama/gpu"
  16. )
  17. // Libraries names may contain an optional variant separated by '_'
  18. // For example, "rocm_v6" and "rocm_v5" or "cpu" and "cpu_avx2"
  19. // Any library without a variant is the lowest common denominator
  20. var availableDynLibs = map[string]string{}
  21. const pathComponentCount = 7
  22. // getDynLibs returns an ordered list of LLM libraries to try, starting with the best
  23. func getDynLibs(gpuInfo gpu.GpuInfo) []string {
  24. // Short circuit if we know we're using the default built-in (darwin only)
  25. if gpuInfo.Library == "default" {
  26. return []string{"default"}
  27. }
  28. // TODO - temporary until we have multiple CPU variations for Darwin
  29. // Short circuit on darwin with metal only
  30. if len(availableDynLibs) == 1 {
  31. if _, onlyMetal := availableDynLibs["metal"]; onlyMetal {
  32. return []string{availableDynLibs["metal"]}
  33. }
  34. }
  35. exactMatch := ""
  36. dynLibs := []string{}
  37. altDynLibs := []string{}
  38. requested := gpuInfo.Library
  39. if gpuInfo.Variant != "" {
  40. requested += "_" + gpuInfo.Variant
  41. }
  42. // Try to find an exact match
  43. for cmp := range availableDynLibs {
  44. if requested == cmp {
  45. exactMatch = cmp
  46. dynLibs = []string{availableDynLibs[cmp]}
  47. break
  48. }
  49. }
  50. // Then for GPUs load alternates and sort the list for consistent load ordering
  51. if gpuInfo.Library != "cpu" {
  52. for cmp := range availableDynLibs {
  53. if gpuInfo.Library == strings.Split(cmp, "_")[0] && cmp != exactMatch {
  54. altDynLibs = append(altDynLibs, cmp)
  55. }
  56. }
  57. slices.Sort(altDynLibs)
  58. for _, altDynLib := range altDynLibs {
  59. dynLibs = append(dynLibs, availableDynLibs[altDynLib])
  60. }
  61. }
  62. // Load up the best CPU variant if not primary requested
  63. if gpuInfo.Library != "cpu" {
  64. variant := gpu.GetCPUVariant()
  65. // If no variant, then we fall back to default
  66. // If we have a variant, try that if we find an exact match
  67. // Attempting to run the wrong CPU instructions will panic the
  68. // process
  69. if variant != "" {
  70. for cmp := range availableDynLibs {
  71. if cmp == "cpu_"+variant {
  72. dynLibs = append(dynLibs, availableDynLibs[cmp])
  73. break
  74. }
  75. }
  76. } else {
  77. dynLibs = append(dynLibs, availableDynLibs["cpu"])
  78. }
  79. }
  80. // Finally, if we didn't find any matches, LCD CPU FTW
  81. if len(dynLibs) == 0 {
  82. dynLibs = []string{availableDynLibs["cpu"]}
  83. }
  84. slog.Debug(fmt.Sprintf("ordered list of LLM libraries to try %v", dynLibs))
  85. return dynLibs
  86. }
  87. func rocmDynLibPresent() bool {
  88. for dynLibName := range availableDynLibs {
  89. if strings.HasPrefix(dynLibName, "rocm") {
  90. return true
  91. }
  92. }
  93. return false
  94. }
  95. func nativeInit(workdir string) error {
  96. slog.Info("Extracting dynamic libraries...")
  97. if runtime.GOOS == "darwin" {
  98. err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
  99. if err != nil {
  100. if err == payloadMissing {
  101. // TODO perhaps consider this a hard failure on arm macs?
  102. slog.Info("ggml-meta.metal payload missing")
  103. return nil
  104. }
  105. return err
  106. }
  107. os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
  108. }
  109. libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*")
  110. if err != nil {
  111. if err == payloadMissing {
  112. slog.Info(fmt.Sprintf("%s", payloadMissing))
  113. return nil
  114. }
  115. return err
  116. }
  117. for _, lib := range libs {
  118. // The last dir component is the variant name
  119. variant := filepath.Base(filepath.Dir(lib))
  120. availableDynLibs[variant] = lib
  121. }
  122. if err := verifyDriverAccess(); err != nil {
  123. return err
  124. }
  125. // Report which dynamic libraries we have loaded to assist troubleshooting
  126. variants := make([]string, len(availableDynLibs))
  127. i := 0
  128. for variant := range availableDynLibs {
  129. variants[i] = variant
  130. i++
  131. }
  132. slog.Info(fmt.Sprintf("Dynamic LLM libraries %v", variants))
  133. slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
  134. return nil
  135. }
  136. func extractDynamicLibs(workDir, glob string) ([]string, error) {
  137. files, err := fs.Glob(libEmbed, glob)
  138. if err != nil || len(files) == 0 {
  139. return nil, payloadMissing
  140. }
  141. libs := []string{}
  142. // TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
  143. // and tracking by version so we don't reexpand the files every time
  144. // Also maybe consider lazy loading only what is needed
  145. g := new(errgroup.Group)
  146. for _, file := range files {
  147. pathComps := strings.Split(file, "/")
  148. if len(pathComps) != pathComponentCount {
  149. slog.Error(fmt.Sprintf("unexpected payload components: %v", pathComps))
  150. continue
  151. }
  152. file := file
  153. g.Go(func() error {
  154. // llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
  155. // Include the variant in the path to avoid conflicts between multiple server libs
  156. targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
  157. srcFile, err := libEmbed.Open(file)
  158. if err != nil {
  159. return fmt.Errorf("read payload %s: %v", file, err)
  160. }
  161. defer srcFile.Close()
  162. if err := os.MkdirAll(targetDir, 0o755); err != nil {
  163. return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
  164. }
  165. src := io.Reader(srcFile)
  166. filename := file
  167. if strings.HasSuffix(file, ".gz") {
  168. src, err = gzip.NewReader(src)
  169. if err != nil {
  170. return fmt.Errorf("decompress payload %s: %v", file, err)
  171. }
  172. filename = strings.TrimSuffix(filename, ".gz")
  173. }
  174. destFile := filepath.Join(targetDir, filepath.Base(filename))
  175. if strings.Contains(destFile, "server") {
  176. libs = append(libs, destFile)
  177. }
  178. _, err = os.Stat(destFile)
  179. switch {
  180. case errors.Is(err, os.ErrNotExist):
  181. destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
  182. if err != nil {
  183. return fmt.Errorf("write payload %s: %v", file, err)
  184. }
  185. defer destFile.Close()
  186. if _, err := io.Copy(destFile, src); err != nil {
  187. return fmt.Errorf("copy payload %s: %v", file, err)
  188. }
  189. case err != nil:
  190. return fmt.Errorf("stat payload %s: %v", file, err)
  191. }
  192. return nil
  193. })
  194. }
  195. return libs, g.Wait()
  196. }
  197. func extractPayloadFiles(workDir, glob string) error {
  198. files, err := fs.Glob(libEmbed, glob)
  199. if err != nil || len(files) == 0 {
  200. return payloadMissing
  201. }
  202. for _, file := range files {
  203. srcFile, err := libEmbed.Open(file)
  204. if err != nil {
  205. return fmt.Errorf("read payload %s: %v", file, err)
  206. }
  207. defer srcFile.Close()
  208. if err := os.MkdirAll(workDir, 0o755); err != nil {
  209. return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
  210. }
  211. src := io.Reader(srcFile)
  212. filename := file
  213. if strings.HasSuffix(file, ".gz") {
  214. src, err = gzip.NewReader(src)
  215. if err != nil {
  216. return fmt.Errorf("decompress payload %s: %v", file, err)
  217. }
  218. filename = strings.TrimSuffix(filename, ".gz")
  219. }
  220. destFile := filepath.Join(workDir, filepath.Base(filename))
  221. _, err = os.Stat(destFile)
  222. switch {
  223. case errors.Is(err, os.ErrNotExist):
  224. destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
  225. if err != nil {
  226. return fmt.Errorf("write payload %s: %v", file, err)
  227. }
  228. defer destFile.Close()
  229. if _, err := io.Copy(destFile, src); err != nil {
  230. return fmt.Errorf("copy payload %s: %v", file, err)
  231. }
  232. case err != nil:
  233. return fmt.Errorf("stat payload %s: %v", file, err)
  234. }
  235. }
  236. return nil
  237. }
  238. func verifyDriverAccess() error {
  239. if runtime.GOOS != "linux" {
  240. return nil
  241. }
  242. // Only check ROCm access if we have the dynamic lib loaded
  243. if rocmDynLibPresent() {
  244. // Verify we have permissions - either running as root, or we have group access to the driver
  245. fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666)
  246. if err != nil {
  247. if errors.Is(err, fs.ErrPermission) {
  248. return fmt.Errorf("Radeon card detected, but permissions not set up properly. Either run ollama as root, or add you user account to the render group.")
  249. } else if errors.Is(err, fs.ErrNotExist) {
  250. // expected behavior without a radeon card
  251. return nil
  252. }
  253. return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
  254. }
  255. fd.Close()
  256. }
  257. return nil
  258. }