common.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. package runners
  2. import (
  3. "compress/gzip"
  4. "context"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "io/fs"
  9. "log/slog"
  10. "os"
  11. "path/filepath"
  12. "runtime"
  13. "slices"
  14. "strconv"
  15. "strings"
  16. "sync"
  17. "syscall"
  18. "time"
  19. "golang.org/x/sync/errgroup"
  20. "github.com/ollama/ollama/api"
  21. "github.com/ollama/ollama/discover"
  22. "github.com/ollama/ollama/envconfig"
  23. )
  24. const (
  25. binGlob = "*/*/*/*"
  26. )
  27. var (
  28. lock sync.Mutex
  29. runnersDir = ""
  30. )
  31. type CompletionRequest struct {
  32. Prompt string
  33. Format string
  34. Images []ImageData
  35. Options *api.Options
  36. }
  37. type CompletionResponse struct {
  38. Content string
  39. DoneReason string
  40. Done bool
  41. PromptEvalCount int
  42. PromptEvalDuration time.Duration
  43. EvalCount int
  44. EvalDuration time.Duration
  45. }
  46. type LLMServer interface {
  47. Ping(ctx context.Context) error
  48. WaitUntilRunning(ctx context.Context) error
  49. Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error
  50. Embedding(ctx context.Context, input string) ([]float32, error)
  51. Tokenize(ctx context.Context, content string) ([]int, error)
  52. Detokenize(ctx context.Context, tokens []int) (string, error)
  53. Close() error
  54. EstimatedVRAM() uint64 // Total VRAM across all GPUs
  55. EstimatedTotal() uint64
  56. EstimatedVRAMByGPU(gpuID string) uint64
  57. }
  58. // Return the location where runners are stored
  59. // If runners are payloads, this will either extract them
  60. // or refresh them if any have disappeared due to tmp cleaners
  61. func Refresh(payloadFS fs.FS) (string, error) {
  62. lock.Lock()
  63. defer lock.Unlock()
  64. var err error
  65. // Wire up extra logging on our first load
  66. if runnersDir == "" {
  67. defer func() {
  68. var runners []string
  69. for v := range GetAvailableServers(runnersDir) {
  70. runners = append(runners, v)
  71. }
  72. slog.Info("Dynamic LLM libraries", "runners", runners)
  73. slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
  74. }()
  75. }
  76. if hasPayloads(payloadFS) {
  77. if runnersDir == "" {
  78. runnersDir, err = extractRunners(payloadFS)
  79. } else {
  80. err = refreshRunners(payloadFS, runnersDir)
  81. }
  82. } else if runnersDir == "" {
  83. runnersDir, err = locateRunners()
  84. }
  85. return runnersDir, err
  86. }
  87. func Cleanup(payloadFS fs.FS) {
  88. lock.Lock()
  89. defer lock.Unlock()
  90. if hasPayloads(payloadFS) && runnersDir != "" {
  91. // We want to fully clean up the tmpdir parent of the payloads dir
  92. tmpDir := filepath.Clean(filepath.Join(runnersDir, ".."))
  93. slog.Debug("cleaning up", "dir", tmpDir)
  94. err := os.RemoveAll(tmpDir)
  95. if err != nil {
  96. slog.Warn("failed to clean up", "dir", tmpDir, "err", err)
  97. }
  98. }
  99. }
  100. func locateRunners() (string, error) {
  101. exe, err := os.Executable()
  102. if err != nil {
  103. return "", err
  104. }
  105. cwd, err := os.Getwd()
  106. if err != nil {
  107. return "", err
  108. }
  109. var paths []string
  110. for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe()), cwd} {
  111. paths = append(paths,
  112. root,
  113. filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
  114. filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
  115. )
  116. }
  117. // Try a few variations to improve developer experience when building from source in the local tree
  118. for _, path := range paths {
  119. candidate := filepath.Join(path, "lib", "ollama", "runners")
  120. if _, err := os.Stat(candidate); err == nil {
  121. return candidate, nil
  122. }
  123. }
  124. return "", fmt.Errorf("unable to locate runners in any search path %v", paths)
  125. }
  126. // Return true if we're carying nested payloads for the runners
  127. func hasPayloads(payloadFS fs.FS) bool {
  128. files, err := fs.Glob(payloadFS, binGlob)
  129. if err != nil || len(files) == 0 || (len(files) == 1 && strings.Contains(files[0], "placeholder")) {
  130. return false
  131. }
  132. return true
  133. }
  134. func extractRunners(payloadFS fs.FS) (string, error) {
  135. cleanupTmpDirs()
  136. tmpDir, err := os.MkdirTemp(envconfig.TmpDir(), "ollama")
  137. if err != nil {
  138. return "", fmt.Errorf("failed to generate tmp dir: %w", err)
  139. }
  140. // Track our pid so we can clean up orphaned tmpdirs
  141. n := filepath.Join(tmpDir, "ollama.pid")
  142. if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
  143. slog.Warn("failed to write pid file", "file", n, "error", err)
  144. }
  145. // We create a distinct subdirectory for payloads within the tmpdir
  146. // This will typically look like /tmp/ollama3208993108/runners on linux
  147. rDir := filepath.Join(tmpDir, "runners")
  148. slog.Info("extracting embedded files", "dir", rDir)
  149. return rDir, refreshRunners(payloadFS, rDir)
  150. }
  151. func refreshRunners(payloadFS fs.FS, rDir string) error {
  152. // extract or refresh server libraries
  153. err := extractFiles(payloadFS, rDir, binGlob)
  154. if err != nil {
  155. return fmt.Errorf("extract binaries: %v", err)
  156. }
  157. return nil
  158. }
  159. // extract extracts the embedded files to the target directory
  160. func extractFiles(payloadFS fs.FS, targetDir string, glob string) error {
  161. files, err := fs.Glob(payloadFS, glob)
  162. if err != nil || len(files) == 0 {
  163. // Should not happen
  164. return fmt.Errorf("extractFiles called without payload present")
  165. }
  166. if err := os.MkdirAll(targetDir, 0o755); err != nil {
  167. return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err)
  168. }
  169. g := new(errgroup.Group)
  170. // $OS/$GOARCH/$RUNNER/$FILE
  171. for _, file := range files {
  172. filename := file
  173. runner := filepath.Base(filepath.Dir(filename))
  174. slog.Debug("extracting", "runner", runner, "payload", filename)
  175. g.Go(func() error {
  176. srcf, err := payloadFS.Open(filename)
  177. if err != nil {
  178. return err
  179. }
  180. defer srcf.Close()
  181. src := io.Reader(srcf)
  182. if strings.HasSuffix(filename, ".gz") {
  183. src, err = gzip.NewReader(src)
  184. if err != nil {
  185. return fmt.Errorf("decompress payload %s: %v", filename, err)
  186. }
  187. filename = strings.TrimSuffix(filename, ".gz")
  188. }
  189. runnerDir := filepath.Join(targetDir, runner)
  190. if err := os.MkdirAll(runnerDir, 0o755); err != nil {
  191. return fmt.Errorf("extractFiles could not mkdir %s: %v", runnerDir, err)
  192. }
  193. base := filepath.Base(filename)
  194. destFilename := filepath.Join(runnerDir, base)
  195. _, err = os.Stat(destFilename)
  196. switch {
  197. case errors.Is(err, os.ErrNotExist):
  198. destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
  199. if err != nil {
  200. return fmt.Errorf("write payload %s: %v", filename, err)
  201. }
  202. defer destFile.Close()
  203. if _, err := io.Copy(destFile, src); err != nil {
  204. return fmt.Errorf("copy payload %s: %v", filename, err)
  205. }
  206. case err != nil:
  207. return fmt.Errorf("stat payload %s: %v", filename, err)
  208. }
  209. return nil
  210. })
  211. }
  212. err = g.Wait()
  213. if err != nil {
  214. slog.Error("failed to extract files", "error", err)
  215. // If we fail to extract, the payload dir is most likely unusable, so cleanup whatever we extracted
  216. err := os.RemoveAll(targetDir)
  217. if err != nil {
  218. slog.Warn("failed to cleanup incomplete payload dir", "dir", targetDir, "error", err)
  219. }
  220. return err
  221. }
  222. return nil
  223. }
  224. // Best effort to clean up prior tmpdirs
  225. func cleanupTmpDirs() {
  226. tmpDir := envconfig.TmpDir()
  227. if tmpDir == "" {
  228. tmpDir = os.TempDir()
  229. }
  230. matches, err := filepath.Glob(filepath.Join(tmpDir, "ollama*", "ollama.pid"))
  231. if err != nil {
  232. return
  233. }
  234. for _, match := range matches {
  235. raw, err := os.ReadFile(match)
  236. if errors.Is(err, os.ErrNotExist) {
  237. slog.Debug("not a ollama runtime directory, skipping", "path", match)
  238. continue
  239. } else if err != nil {
  240. slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
  241. continue
  242. }
  243. pid, err := strconv.Atoi(string(raw))
  244. if err != nil {
  245. slog.Warn("invalid pid, skipping", "path", match, "error", err)
  246. continue
  247. }
  248. p, err := os.FindProcess(pid)
  249. if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
  250. slog.Warn("process still running, skipping", "pid", pid, "path", match)
  251. continue
  252. }
  253. if err := os.Remove(match); err != nil {
  254. slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
  255. }
  256. runners := filepath.Join(filepath.Dir(match), "runners")
  257. if err := os.RemoveAll(runners); err != nil {
  258. slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
  259. }
  260. if err := os.Remove(filepath.Dir(match)); err != nil {
  261. slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
  262. }
  263. }
  264. }
  265. // directory names are the name of the runner and may contain an optional
  266. // variant prefixed with '_' as the separator. For example, "cuda_v11" and
  267. // "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the
  268. // lowest common denominator
  269. func GetAvailableServers(payloadsDir string) map[string]string {
  270. if payloadsDir == "" {
  271. slog.Error("empty runner dir")
  272. return nil
  273. }
  274. // glob payloadsDir for files that start with ollama_
  275. pattern := filepath.Join(payloadsDir, "*", "ollama_*")
  276. files, err := filepath.Glob(pattern)
  277. if err != nil {
  278. slog.Debug("could not glob", "pattern", pattern, "error", err)
  279. return nil
  280. }
  281. servers := make(map[string]string)
  282. for _, file := range files {
  283. slog.Debug("availableServers : found", "file", file)
  284. servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file)
  285. }
  286. return servers
  287. }
  288. // serversForGpu returns a list of compatible servers give the provided GPU
  289. // info, ordered by performance. assumes Init() has been called
  290. // TODO - switch to metadata based mapping
  291. func ServersForGpu(info discover.GpuInfo) []string {
  292. // glob workDir for files that start with ollama_
  293. availableServers := GetAvailableServers(runnersDir)
  294. requested := info.Library
  295. if info.Variant != discover.CPUCapabilityNone.String() {
  296. requested += "_" + info.Variant
  297. }
  298. servers := []string{}
  299. // exact match first
  300. for a := range availableServers {
  301. if a == requested {
  302. servers = []string{a}
  303. if a == "metal" {
  304. return servers
  305. }
  306. break
  307. }
  308. }
  309. alt := []string{}
  310. // Then for GPUs load alternates and sort the list for consistent load ordering
  311. if info.Library != "cpu" {
  312. for a := range availableServers {
  313. if info.Library == strings.Split(a, "_")[0] && a != requested {
  314. alt = append(alt, a)
  315. }
  316. }
  317. slices.Sort(alt)
  318. servers = append(servers, alt...)
  319. }
  320. if !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") {
  321. // Load up the best CPU variant if not primary requested
  322. if info.Library != "cpu" {
  323. variant := discover.GetCPUCapability()
  324. // If no variant, then we fall back to default
  325. // If we have a variant, try that if we find an exact match
  326. // Attempting to run the wrong CPU instructions will panic the
  327. // process
  328. if variant != discover.CPUCapabilityNone {
  329. for cmp := range availableServers {
  330. if cmp == "cpu_"+variant.String() {
  331. servers = append(servers, cmp)
  332. break
  333. }
  334. }
  335. } else {
  336. servers = append(servers, "cpu")
  337. }
  338. }
  339. if len(servers) == 0 {
  340. servers = []string{"cpu"}
  341. }
  342. }
  343. return servers
  344. }
  345. // Return the optimal server for this CPU architecture
  346. func ServerForCpu() string {
  347. if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
  348. return "metal"
  349. }
  350. variant := discover.GetCPUCapability()
  351. availableServers := GetAvailableServers(runnersDir)
  352. if variant != discover.CPUCapabilityNone {
  353. for cmp := range availableServers {
  354. if cmp == "cpu_"+variant.String() {
  355. return cmp
  356. }
  357. }
  358. }
  359. return "cpu"
  360. }