payload_common.go 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. package llm
  2. import (
  3. "errors"
  4. "fmt"
  5. "io"
  6. "io/fs"
  7. "log"
  8. "os"
  9. "path/filepath"
  10. "runtime"
  11. "slices"
  12. "strings"
  13. "github.com/jmorganca/ollama/gpu"
  14. )
  15. // Libraries names may contain an optional variant separated by '_'
  16. // For example, "rocm_v6" and "rocm_v5" or "cpu" and "cpu_avx2"
  17. // Any library without a variant is the lowest common denominator
  18. var availableDynLibs = map[string]string{}
  19. const pathComponentCount = 6
  20. // getDynLibs returns an ordered list of LLM libraries to try, starting with the best
  21. func getDynLibs(gpuInfo gpu.GpuInfo) []string {
  22. // Short circuit if we know we're using the default built-in (darwin only)
  23. if gpuInfo.Library == "default" {
  24. return []string{"default"}
  25. }
  26. // TODO - temporary until we have multiple CPU variations for Darwin
  27. // Short circuit on darwin with metal only
  28. if len(availableDynLibs) == 1 {
  29. if _, onlyMetal := availableDynLibs["metal"]; onlyMetal {
  30. return []string{availableDynLibs["metal"]}
  31. }
  32. }
  33. exactMatch := ""
  34. dynLibs := []string{}
  35. altDynLibs := []string{}
  36. requested := gpuInfo.Library
  37. if gpuInfo.Variant != "" {
  38. requested += "_" + gpuInfo.Variant
  39. }
  40. // Try to find an exact match
  41. for cmp := range availableDynLibs {
  42. if requested == cmp {
  43. exactMatch = cmp
  44. dynLibs = []string{availableDynLibs[cmp]}
  45. break
  46. }
  47. }
  48. // Then for GPUs load alternates and sort the list for consistent load ordering
  49. if gpuInfo.Library != "cpu" {
  50. for cmp := range availableDynLibs {
  51. if gpuInfo.Library == strings.Split(cmp, "_")[0] && cmp != exactMatch {
  52. altDynLibs = append(altDynLibs, cmp)
  53. }
  54. }
  55. slices.Sort(altDynLibs)
  56. for _, altDynLib := range altDynLibs {
  57. dynLibs = append(dynLibs, availableDynLibs[altDynLib])
  58. }
  59. }
  60. // Load up the best CPU variant if not primary requested
  61. if gpuInfo.Library != "cpu" {
  62. variant := gpu.GetCPUVariant()
  63. // If no variant, then we fall back to default
  64. // If we have a variant, try that if we find an exact match
  65. // Attempting to run the wrong CPU instructions will panic the
  66. // process
  67. if variant != "" {
  68. for cmp := range availableDynLibs {
  69. if cmp == "cpu_"+variant {
  70. dynLibs = append(dynLibs, availableDynLibs[cmp])
  71. break
  72. }
  73. }
  74. } else {
  75. dynLibs = append(dynLibs, availableDynLibs["cpu"])
  76. }
  77. }
  78. // Finaly, if we didn't find any matches, LCD CPU FTW
  79. if len(dynLibs) == 0 {
  80. dynLibs = []string{availableDynLibs["cpu"]}
  81. }
  82. return dynLibs
  83. }
  84. func rocmDynLibPresent() bool {
  85. for dynLibName := range availableDynLibs {
  86. if strings.HasPrefix(dynLibName, "rocm") {
  87. return true
  88. }
  89. }
  90. return false
  91. }
  92. func nativeInit(workdir string) error {
  93. if runtime.GOOS == "darwin" {
  94. err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
  95. if err != nil {
  96. if err == payloadMissing {
  97. // TODO perhaps consider this a hard failure on arm macs?
  98. log.Printf("ggml-meta.metal payload missing")
  99. return nil
  100. }
  101. return err
  102. }
  103. os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
  104. }
  105. libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*")
  106. if err != nil {
  107. if err == payloadMissing {
  108. log.Printf("%s", payloadMissing)
  109. return nil
  110. }
  111. return err
  112. }
  113. for _, lib := range libs {
  114. // The last dir component is the variant name
  115. variant := filepath.Base(filepath.Dir(lib))
  116. availableDynLibs[variant] = lib
  117. }
  118. if err := verifyDriverAccess(); err != nil {
  119. return err
  120. }
  121. // Report which dynamic libraries we have loaded to assist troubleshooting
  122. variants := make([]string, len(availableDynLibs))
  123. i := 0
  124. for variant := range availableDynLibs {
  125. variants[i] = variant
  126. i++
  127. }
  128. log.Printf("Dynamic LLM libraries %v", variants)
  129. log.Printf("Override detection logic by setting OLLAMA_LLM_LIBRARY")
  130. return nil
  131. }
  132. func extractDynamicLibs(workDir, glob string) ([]string, error) {
  133. files, err := fs.Glob(libEmbed, glob)
  134. if err != nil || len(files) == 0 {
  135. return nil, payloadMissing
  136. }
  137. libs := []string{}
  138. for _, file := range files {
  139. pathComps := strings.Split(file, "/")
  140. if len(pathComps) != pathComponentCount {
  141. log.Printf("unexpected payload components: %v", pathComps)
  142. continue
  143. }
  144. // llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
  145. // Include the variant in the path to avoid conflicts between multiple server libs
  146. targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
  147. srcFile, err := libEmbed.Open(file)
  148. if err != nil {
  149. return nil, fmt.Errorf("read payload %s: %v", file, err)
  150. }
  151. defer srcFile.Close()
  152. if err := os.MkdirAll(targetDir, 0o755); err != nil {
  153. return nil, fmt.Errorf("create payload temp dir %s: %v", workDir, err)
  154. }
  155. destFile := filepath.Join(targetDir, filepath.Base(file))
  156. if strings.Contains(destFile, "server") {
  157. libs = append(libs, destFile)
  158. }
  159. _, err = os.Stat(destFile)
  160. switch {
  161. case errors.Is(err, os.ErrNotExist):
  162. destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
  163. if err != nil {
  164. return nil, fmt.Errorf("write payload %s: %v", file, err)
  165. }
  166. defer destFile.Close()
  167. if _, err := io.Copy(destFile, srcFile); err != nil {
  168. return nil, fmt.Errorf("copy payload %s: %v", file, err)
  169. }
  170. case err != nil:
  171. return nil, fmt.Errorf("stat payload %s: %v", file, err)
  172. }
  173. }
  174. return libs, nil
  175. }
  176. func extractPayloadFiles(workDir, glob string) error {
  177. files, err := fs.Glob(libEmbed, glob)
  178. if err != nil || len(files) == 0 {
  179. return payloadMissing
  180. }
  181. for _, file := range files {
  182. srcFile, err := libEmbed.Open(file)
  183. if err != nil {
  184. return fmt.Errorf("read payload %s: %v", file, err)
  185. }
  186. defer srcFile.Close()
  187. if err := os.MkdirAll(workDir, 0o755); err != nil {
  188. return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
  189. }
  190. destFile := filepath.Join(workDir, filepath.Base(file))
  191. _, err = os.Stat(destFile)
  192. switch {
  193. case errors.Is(err, os.ErrNotExist):
  194. destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
  195. if err != nil {
  196. return fmt.Errorf("write payload %s: %v", file, err)
  197. }
  198. defer destFile.Close()
  199. if _, err := io.Copy(destFile, srcFile); err != nil {
  200. return fmt.Errorf("copy payload %s: %v", file, err)
  201. }
  202. case err != nil:
  203. return fmt.Errorf("stat payload %s: %v", file, err)
  204. }
  205. }
  206. return nil
  207. }
  208. func verifyDriverAccess() error {
  209. if runtime.GOOS != "linux" {
  210. return nil
  211. }
  212. // Only check ROCm access if we have the dynamic lib loaded
  213. if rocmDynLibPresent() {
  214. // Verify we have permissions - either running as root, or we have group access to the driver
  215. fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666)
  216. if err != nil {
  217. if errors.Is(err, fs.ErrPermission) {
  218. return fmt.Errorf("Radeon card detected, but permissions not set up properly. Either run ollama as root, or add you user account to the render group.")
  219. } else if errors.Is(err, fs.ErrNotExist) {
  220. // expected behavior without a radeon card
  221. return nil
  222. }
  223. return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
  224. }
  225. fd.Close()
  226. }
  227. return nil
  228. }