common.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. package runners
  2. import (
  3. "compress/gzip"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/fs"
  8. "log/slog"
  9. "os"
  10. "path/filepath"
  11. "runtime"
  12. "slices"
  13. "strconv"
  14. "strings"
  15. "sync"
  16. "syscall"
  17. "golang.org/x/sync/errgroup"
  18. "github.com/ollama/ollama/envconfig"
  19. "github.com/ollama/ollama/gpu"
  20. )
  21. const (
  22. binGlob = "*/*/*/*"
  23. )
  24. var (
  25. lock sync.Mutex
  26. runnersDir = ""
  27. )
  28. // Return the location where runners are stored
  29. // If runners are payloads, this will either extract them
  30. // or refresh them if any have disappeared due to tmp cleaners
  31. func Refresh(payloadFS fs.FS) (string, error) {
  32. lock.Lock()
  33. defer lock.Unlock()
  34. var err error
  35. // Wire up extra logging on our first load
  36. if runnersDir == "" {
  37. defer func() {
  38. var runners []string
  39. for v := range GetAvailableServers(runnersDir) {
  40. runners = append(runners, v)
  41. }
  42. slog.Info("Dynamic LLM libraries", "runners", runners)
  43. slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
  44. }()
  45. }
  46. if hasPayloads(payloadFS) {
  47. if runnersDir == "" {
  48. runnersDir, err = extractRunners(payloadFS)
  49. } else {
  50. err = refreshRunners(payloadFS, runnersDir)
  51. }
  52. } else if runnersDir == "" {
  53. runnersDir, err = locateRunners()
  54. }
  55. return runnersDir, err
  56. }
  57. func Cleanup(payloadFS fs.FS) {
  58. lock.Lock()
  59. defer lock.Unlock()
  60. if hasPayloads(payloadFS) && runnersDir != "" {
  61. // We want to fully clean up the tmpdir parent of the payloads dir
  62. tmpDir := filepath.Clean(filepath.Join(runnersDir, ".."))
  63. slog.Debug("cleaning up", "dir", tmpDir)
  64. err := os.RemoveAll(tmpDir)
  65. if err != nil {
  66. slog.Warn("failed to clean up", "dir", tmpDir, "err", err)
  67. }
  68. }
  69. }
  70. func locateRunners() (string, error) {
  71. exe, err := os.Executable()
  72. if err != nil {
  73. return "", err
  74. }
  75. cwd, err := os.Getwd()
  76. if err != nil {
  77. return "", err
  78. }
  79. var paths []string
  80. for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe()), cwd} {
  81. paths = append(paths,
  82. root,
  83. filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
  84. filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
  85. )
  86. }
  87. // Try a few variations to improve developer experience when building from source in the local tree
  88. for _, path := range paths {
  89. candidate := filepath.Join(path, "lib", "ollama", "runners")
  90. if _, err := os.Stat(candidate); err == nil {
  91. return candidate, nil
  92. }
  93. }
  94. return "", fmt.Errorf("unable to locate runners in any search path %v", paths)
  95. }
  96. // Return true if we're carying nested payloads for the runners
  97. func hasPayloads(payloadFS fs.FS) bool {
  98. files, err := fs.Glob(payloadFS, binGlob)
  99. if err != nil || len(files) == 0 || (len(files) == 1 && strings.Contains(files[0], "placeholder")) {
  100. return false
  101. }
  102. return true
  103. }
  104. func extractRunners(payloadFS fs.FS) (string, error) {
  105. cleanupTmpDirs()
  106. tmpDir, err := os.MkdirTemp(envconfig.TempDir(), "ollama")
  107. if err != nil {
  108. return "", fmt.Errorf("failed to generate tmp dir: %w", err)
  109. }
  110. // Track our pid so we can clean up orphaned tmpdirs
  111. n := filepath.Join(tmpDir, "ollama.pid")
  112. if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
  113. slog.Warn("failed to write pid file", "file", n, "error", err)
  114. }
  115. // We create a distinct subdirectory for payloads within the tmpdir
  116. // This will typically look like /tmp/ollama3208993108/runners on linux
  117. rDir := filepath.Join(tmpDir, "runners")
  118. slog.Info("extracting embedded files", "dir", rDir)
  119. return rDir, refreshRunners(payloadFS, rDir)
  120. }
  121. func refreshRunners(payloadFS fs.FS, rDir string) error {
  122. // extract or refresh server libraries
  123. err := extractFiles(payloadFS, rDir, binGlob)
  124. if err != nil {
  125. return fmt.Errorf("extract binaries: %v", err)
  126. }
  127. return nil
  128. }
  129. // extract extracts the embedded files to the target directory
  130. func extractFiles(payloadFS fs.FS, targetDir string, glob string) error {
  131. files, err := fs.Glob(payloadFS, glob)
  132. if err != nil || len(files) == 0 {
  133. // Should not happen
  134. return fmt.Errorf("extractFiles called without payload present")
  135. }
  136. if err := os.MkdirAll(targetDir, 0o755); err != nil {
  137. return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err)
  138. }
  139. g := new(errgroup.Group)
  140. // $OS/$GOARCH/$RUNNER/$FILE
  141. for _, file := range files {
  142. filename := file
  143. runner := filepath.Base(filepath.Dir(filename))
  144. slog.Debug("extracting", "runner", runner, "payload", filename)
  145. g.Go(func() error {
  146. srcf, err := payloadFS.Open(filename)
  147. if err != nil {
  148. return err
  149. }
  150. defer srcf.Close()
  151. src := io.Reader(srcf)
  152. if strings.HasSuffix(filename, ".gz") {
  153. src, err = gzip.NewReader(src)
  154. if err != nil {
  155. return fmt.Errorf("decompress payload %s: %v", filename, err)
  156. }
  157. filename = strings.TrimSuffix(filename, ".gz")
  158. }
  159. runnerDir := filepath.Join(targetDir, runner)
  160. if err := os.MkdirAll(runnerDir, 0o755); err != nil {
  161. return fmt.Errorf("extractFiles could not mkdir %s: %v", runnerDir, err)
  162. }
  163. base := filepath.Base(filename)
  164. destFilename := filepath.Join(runnerDir, base)
  165. _, err = os.Stat(destFilename)
  166. switch {
  167. case errors.Is(err, os.ErrNotExist):
  168. destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
  169. if err != nil {
  170. return fmt.Errorf("write payload %s: %v", filename, err)
  171. }
  172. defer destFile.Close()
  173. if _, err := io.Copy(destFile, src); err != nil {
  174. return fmt.Errorf("copy payload %s: %v", filename, err)
  175. }
  176. case err != nil:
  177. return fmt.Errorf("stat payload %s: %v", filename, err)
  178. }
  179. return nil
  180. })
  181. }
  182. err = g.Wait()
  183. if err != nil {
  184. slog.Error("failed to extract files", "error", err)
  185. // If we fail to extract, the payload dir is most likely unusable, so cleanup whatever we extracted
  186. err := os.RemoveAll(targetDir)
  187. if err != nil {
  188. slog.Warn("failed to cleanup incomplete payload dir", "dir", targetDir, "error", err)
  189. }
  190. return err
  191. }
  192. return nil
  193. }
  194. // Best effort to clean up prior tmpdirs
  195. func cleanupTmpDirs() {
  196. tmpDir := envconfig.TempDir()
  197. if tmpDir == "" {
  198. tmpDir = os.TempDir()
  199. }
  200. matches, err := filepath.Glob(filepath.Join(tmpDir, "ollama*", "ollama.pid"))
  201. if err != nil {
  202. return
  203. }
  204. for _, match := range matches {
  205. raw, err := os.ReadFile(match)
  206. if errors.Is(err, os.ErrNotExist) {
  207. slog.Debug("not a ollama runtime directory, skipping", "path", match)
  208. continue
  209. } else if err != nil {
  210. slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
  211. continue
  212. }
  213. pid, err := strconv.Atoi(string(raw))
  214. if err != nil {
  215. slog.Warn("invalid pid, skipping", "path", match, "error", err)
  216. continue
  217. }
  218. p, err := os.FindProcess(pid)
  219. if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
  220. slog.Warn("process still running, skipping", "pid", pid, "path", match)
  221. continue
  222. }
  223. if err := os.Remove(match); err != nil {
  224. slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
  225. }
  226. runners := filepath.Join(filepath.Dir(match), "runners")
  227. if err := os.RemoveAll(runners); err != nil {
  228. slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
  229. }
  230. if err := os.Remove(filepath.Dir(match)); err != nil {
  231. slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
  232. }
  233. }
  234. }
  235. // directory names are the name of the runner and may contain an optional
  236. // variant prefixed with '_' as the separator. For example, "cuda_v11" and
  237. // "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the
  238. // lowest common denominator
  239. func GetAvailableServers(payloadsDir string) map[string]string {
  240. if payloadsDir == "" {
  241. slog.Error("empty runner dir")
  242. return nil
  243. }
  244. // glob payloadsDir for files that start with ollama_
  245. pattern := filepath.Join(payloadsDir, "*", "ollama_*")
  246. files, err := filepath.Glob(pattern)
  247. if err != nil {
  248. slog.Debug("could not glob", "pattern", pattern, "error", err)
  249. return nil
  250. }
  251. servers := make(map[string]string)
  252. for _, file := range files {
  253. slog.Debug("availableServers : found", "file", file)
  254. servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file)
  255. }
  256. return servers
  257. }
  258. // serversForGpu returns a list of compatible servers give the provided GPU
  259. // info, ordered by performance. assumes Init() has been called
  260. // TODO - switch to metadata based mapping
  261. func ServersForGpu(info gpu.GpuInfo) []string {
  262. // glob workDir for files that start with ollama_
  263. availableServers := GetAvailableServers(runnersDir)
  264. requested := info.Library
  265. if info.Variant != gpu.CPUCapabilityNone.String() {
  266. requested += "_" + info.Variant
  267. }
  268. servers := []string{}
  269. // exact match first
  270. for a := range availableServers {
  271. if a == requested {
  272. servers = []string{a}
  273. if a == "metal" {
  274. return servers
  275. }
  276. break
  277. }
  278. }
  279. alt := []string{}
  280. // Then for GPUs load alternates and sort the list for consistent load ordering
  281. if info.Library != "cpu" {
  282. for a := range availableServers {
  283. if info.Library == strings.Split(a, "_")[0] && a != requested {
  284. alt = append(alt, a)
  285. }
  286. }
  287. slices.Sort(alt)
  288. servers = append(servers, alt...)
  289. }
  290. if !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") {
  291. // Load up the best CPU variant if not primary requested
  292. if info.Library != "cpu" {
  293. variant := gpu.GetCPUCapability()
  294. // If no variant, then we fall back to default
  295. // If we have a variant, try that if we find an exact match
  296. // Attempting to run the wrong CPU instructions will panic the
  297. // process
  298. if variant != gpu.CPUCapabilityNone {
  299. for cmp := range availableServers {
  300. if cmp == "cpu_"+variant.String() {
  301. servers = append(servers, cmp)
  302. break
  303. }
  304. }
  305. } else {
  306. servers = append(servers, "cpu")
  307. }
  308. }
  309. if len(servers) == 0 {
  310. servers = []string{"cpu"}
  311. }
  312. }
  313. return servers
  314. }
  315. // Return the optimal server for this CPU architecture
  316. func ServerForCpu() string {
  317. if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
  318. return "metal"
  319. }
  320. variant := gpu.GetCPUCapability()
  321. availableServers := GetAvailableServers(runnersDir)
  322. if variant != gpu.CPUCapabilityNone {
  323. for cmp := range availableServers {
  324. if cmp == "cpu_"+variant.String() {
  325. return cmp
  326. }
  327. }
  328. }
  329. return "cpu"
  330. }