|
@@ -0,0 +1,384 @@
|
|
|
+package runners
|
|
|
+
|
|
|
+import (
|
|
|
+ "compress/gzip"
|
|
|
+ "errors"
|
|
|
+ "fmt"
|
|
|
+ "io"
|
|
|
+ "io/fs"
|
|
|
+ "log/slog"
|
|
|
+ "os"
|
|
|
+ "path/filepath"
|
|
|
+ "runtime"
|
|
|
+ "slices"
|
|
|
+ "strconv"
|
|
|
+ "strings"
|
|
|
+ "sync"
|
|
|
+ "syscall"
|
|
|
+
|
|
|
+ "golang.org/x/sync/errgroup"
|
|
|
+
|
|
|
+ "github.com/ollama/ollama/envconfig"
|
|
|
+ "github.com/ollama/ollama/gpu"
|
|
|
+)
|
|
|
+
|
|
|
+const (
|
|
|
+ binGlob = "*/*/*/*"
|
|
|
+)
|
|
|
+
|
|
|
+var (
|
|
|
+ lock sync.Mutex
|
|
|
+ runnersDir = ""
|
|
|
+)
|
|
|
+
|
|
|
+// Return the location where runners are stored
|
|
|
+// If runners are payloads, this will either extract them
|
|
|
+// or refresh them if any have disappeared due to tmp cleaners
|
|
|
+func Refresh(payloadFS fs.FS) (string, error) {
|
|
|
+ lock.Lock()
|
|
|
+ defer lock.Unlock()
|
|
|
+ var err error
|
|
|
+
|
|
|
+ // Wire up extra logging on our first load
|
|
|
+ if runnersDir == "" {
|
|
|
+ defer func() {
|
|
|
+ var runners []string
|
|
|
+ for v := range GetAvailableServers(runnersDir) {
|
|
|
+ runners = append(runners, v)
|
|
|
+ }
|
|
|
+ slog.Info("Dynamic LLM libraries", "runners", runners)
|
|
|
+ slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
|
|
|
+ }()
|
|
|
+ }
|
|
|
+
|
|
|
+ if hasPayloads(payloadFS) {
|
|
|
+ if runnersDir == "" {
|
|
|
+ runnersDir, err = extractRunners(payloadFS)
|
|
|
+ } else {
|
|
|
+ err = refreshRunners(payloadFS, runnersDir)
|
|
|
+ }
|
|
|
+ } else if runnersDir == "" {
|
|
|
+ runnersDir, err = locateRunners()
|
|
|
+ }
|
|
|
+
|
|
|
+ return runnersDir, err
|
|
|
+}
|
|
|
+
|
|
|
+func Cleanup(payloadFS fs.FS) {
|
|
|
+ lock.Lock()
|
|
|
+ defer lock.Unlock()
|
|
|
+ if hasPayloads(payloadFS) && runnersDir != "" {
|
|
|
+ // We want to fully clean up the tmpdir parent of the payloads dir
|
|
|
+ tmpDir := filepath.Clean(filepath.Join(runnersDir, ".."))
|
|
|
+ slog.Debug("cleaning up", "dir", tmpDir)
|
|
|
+ err := os.RemoveAll(tmpDir)
|
|
|
+ if err != nil {
|
|
|
+ slog.Warn("failed to clean up", "dir", tmpDir, "err", err)
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func locateRunners() (string, error) {
|
|
|
+ exe, err := os.Executable()
|
|
|
+ if err != nil {
|
|
|
+ return "", err
|
|
|
+ }
|
|
|
+
|
|
|
+ cwd, err := os.Getwd()
|
|
|
+ if err != nil {
|
|
|
+ return "", err
|
|
|
+ }
|
|
|
+
|
|
|
+ var paths []string
|
|
|
+ for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe()), cwd} {
|
|
|
+ paths = append(paths,
|
|
|
+ root,
|
|
|
+ filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
|
|
|
+ filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ // Try a few variations to improve developer experience when building from source in the local tree
|
|
|
+ for _, path := range paths {
|
|
|
+ candidate := filepath.Join(path, "lib", "ollama", "runners")
|
|
|
+ if _, err := os.Stat(candidate); err == nil {
|
|
|
+ return candidate, nil
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return "", fmt.Errorf("unable to locate runners in any search path %v", paths)
|
|
|
+}
|
|
|
+
|
|
|
+// Return true if we're carying nested payloads for the runners
|
|
|
+func hasPayloads(payloadFS fs.FS) bool {
|
|
|
+ files, err := fs.Glob(payloadFS, binGlob)
|
|
|
+ if err != nil || len(files) == 0 || (len(files) == 1 && strings.Contains(files[0], "placeholder")) {
|
|
|
+ return false
|
|
|
+ }
|
|
|
+ return true
|
|
|
+}
|
|
|
+
|
|
|
+func extractRunners(payloadFS fs.FS) (string, error) {
|
|
|
+ cleanupTmpDirs()
|
|
|
+ tmpDir, err := os.MkdirTemp(envconfig.TmpDir(), "ollama")
|
|
|
+ if err != nil {
|
|
|
+ return "", fmt.Errorf("failed to generate tmp dir: %w", err)
|
|
|
+ }
|
|
|
+ // Track our pid so we can clean up orphaned tmpdirs
|
|
|
+ n := filepath.Join(tmpDir, "ollama.pid")
|
|
|
+ if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
|
|
|
+ slog.Warn("failed to write pid file", "file", n, "error", err)
|
|
|
+ }
|
|
|
+ // We create a distinct subdirectory for payloads within the tmpdir
|
|
|
+ // This will typically look like /tmp/ollama3208993108/runners on linux
|
|
|
+ rDir := filepath.Join(tmpDir, "runners")
|
|
|
+
|
|
|
+ slog.Info("extracting embedded files", "dir", rDir)
|
|
|
+ return rDir, refreshRunners(payloadFS, rDir)
|
|
|
+}
|
|
|
+
|
|
|
+func refreshRunners(payloadFS fs.FS, rDir string) error {
|
|
|
+ // extract or refresh server libraries
|
|
|
+ err := extractFiles(payloadFS, rDir, binGlob)
|
|
|
+ if err != nil {
|
|
|
+ return fmt.Errorf("extract binaries: %v", err)
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+// extract extracts the embedded files to the target directory
|
|
|
+func extractFiles(payloadFS fs.FS, targetDir string, glob string) error {
|
|
|
+ files, err := fs.Glob(payloadFS, glob)
|
|
|
+ if err != nil || len(files) == 0 {
|
|
|
+ // Should not happen
|
|
|
+ return fmt.Errorf("extractFiles called without payload present")
|
|
|
+ }
|
|
|
+
|
|
|
+ if err := os.MkdirAll(targetDir, 0o755); err != nil {
|
|
|
+ return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err)
|
|
|
+ }
|
|
|
+
|
|
|
+ g := new(errgroup.Group)
|
|
|
+
|
|
|
+ // $OS/$GOARCH/$RUNNER/$FILE
|
|
|
+ for _, file := range files {
|
|
|
+ filename := file
|
|
|
+
|
|
|
+ runner := filepath.Base(filepath.Dir(filename))
|
|
|
+
|
|
|
+ slog.Debug("extracting", "runner", runner, "payload", filename)
|
|
|
+
|
|
|
+ g.Go(func() error {
|
|
|
+ srcf, err := payloadFS.Open(filename)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ defer srcf.Close()
|
|
|
+
|
|
|
+ src := io.Reader(srcf)
|
|
|
+ if strings.HasSuffix(filename, ".gz") {
|
|
|
+ src, err = gzip.NewReader(src)
|
|
|
+ if err != nil {
|
|
|
+ return fmt.Errorf("decompress payload %s: %v", filename, err)
|
|
|
+ }
|
|
|
+ filename = strings.TrimSuffix(filename, ".gz")
|
|
|
+ }
|
|
|
+
|
|
|
+ runnerDir := filepath.Join(targetDir, runner)
|
|
|
+ if err := os.MkdirAll(runnerDir, 0o755); err != nil {
|
|
|
+ return fmt.Errorf("extractFiles could not mkdir %s: %v", runnerDir, err)
|
|
|
+ }
|
|
|
+
|
|
|
+ base := filepath.Base(filename)
|
|
|
+ destFilename := filepath.Join(runnerDir, base)
|
|
|
+
|
|
|
+ _, err = os.Stat(destFilename)
|
|
|
+ switch {
|
|
|
+ case errors.Is(err, os.ErrNotExist):
|
|
|
+ destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
|
|
+ if err != nil {
|
|
|
+ return fmt.Errorf("write payload %s: %v", filename, err)
|
|
|
+ }
|
|
|
+ defer destFile.Close()
|
|
|
+ if _, err := io.Copy(destFile, src); err != nil {
|
|
|
+ return fmt.Errorf("copy payload %s: %v", filename, err)
|
|
|
+ }
|
|
|
+ case err != nil:
|
|
|
+ return fmt.Errorf("stat payload %s: %v", filename, err)
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+ })
|
|
|
+ }
|
|
|
+
|
|
|
+ err = g.Wait()
|
|
|
+ if err != nil {
|
|
|
+ slog.Error("failed to extract files", "error", err)
|
|
|
+ // If we fail to extract, the payload dir is most likely unusable, so cleanup whatever we extracted
|
|
|
+ err := os.RemoveAll(targetDir)
|
|
|
+ if err != nil {
|
|
|
+ slog.Warn("failed to cleanup incomplete payload dir", "dir", targetDir, "error", err)
|
|
|
+ }
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+// Best effort to clean up prior tmpdirs
|
|
|
+func cleanupTmpDirs() {
|
|
|
+ tmpDir := envconfig.TmpDir()
|
|
|
+ if tmpDir == "" {
|
|
|
+ tmpDir = os.TempDir()
|
|
|
+ }
|
|
|
+ matches, err := filepath.Glob(filepath.Join(tmpDir, "ollama*", "ollama.pid"))
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, match := range matches {
|
|
|
+ raw, err := os.ReadFile(match)
|
|
|
+ if errors.Is(err, os.ErrNotExist) {
|
|
|
+ slog.Debug("not a ollama runtime directory, skipping", "path", match)
|
|
|
+ continue
|
|
|
+ } else if err != nil {
|
|
|
+ slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
+ pid, err := strconv.Atoi(string(raw))
|
|
|
+ if err != nil {
|
|
|
+ slog.Warn("invalid pid, skipping", "path", match, "error", err)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
+ p, err := os.FindProcess(pid)
|
|
|
+ if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
|
|
|
+ slog.Warn("process still running, skipping", "pid", pid, "path", match)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
+ if err := os.Remove(match); err != nil {
|
|
|
+ slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
|
|
|
+ }
|
|
|
+
|
|
|
+ runners := filepath.Join(filepath.Dir(match), "runners")
|
|
|
+ if err := os.RemoveAll(runners); err != nil {
|
|
|
+ slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
|
|
|
+ }
|
|
|
+
|
|
|
+ if err := os.Remove(filepath.Dir(match)); err != nil {
|
|
|
+ slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// directory names are the name of the runner and may contain an optional
|
|
|
+// variant prefixed with '_' as the separator. For example, "cuda_v11" and
|
|
|
+// "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the
|
|
|
+// lowest common denominator
|
|
|
+func GetAvailableServers(payloadsDir string) map[string]string {
|
|
|
+ if payloadsDir == "" {
|
|
|
+ slog.Error("empty runner dir")
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+
|
|
|
+ // glob payloadsDir for files that start with ollama_
|
|
|
+ pattern := filepath.Join(payloadsDir, "*", "ollama_*")
|
|
|
+
|
|
|
+ files, err := filepath.Glob(pattern)
|
|
|
+ if err != nil {
|
|
|
+ slog.Debug("could not glob", "pattern", pattern, "error", err)
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+
|
|
|
+ servers := make(map[string]string)
|
|
|
+ for _, file := range files {
|
|
|
+ slog.Debug("availableServers : found", "file", file)
|
|
|
+ servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file)
|
|
|
+ }
|
|
|
+
|
|
|
+ return servers
|
|
|
+}
|
|
|
+
|
|
|
+// serversForGpu returns a list of compatible servers give the provided GPU
|
|
|
+// info, ordered by performance. assumes Init() has been called
|
|
|
+// TODO - switch to metadata based mapping
|
|
|
+func ServersForGpu(info gpu.GpuInfo) []string {
|
|
|
+ // glob workDir for files that start with ollama_
|
|
|
+ availableServers := GetAvailableServers(runnersDir)
|
|
|
+ requested := info.Library
|
|
|
+ if info.Variant != gpu.CPUCapabilityNone.String() {
|
|
|
+ requested += "_" + info.Variant
|
|
|
+ }
|
|
|
+
|
|
|
+ servers := []string{}
|
|
|
+
|
|
|
+ // exact match first
|
|
|
+ for a := range availableServers {
|
|
|
+ if a == requested {
|
|
|
+ servers = []string{a}
|
|
|
+
|
|
|
+ if a == "metal" {
|
|
|
+ return servers
|
|
|
+ }
|
|
|
+
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ alt := []string{}
|
|
|
+
|
|
|
+ // Then for GPUs load alternates and sort the list for consistent load ordering
|
|
|
+ if info.Library != "cpu" {
|
|
|
+ for a := range availableServers {
|
|
|
+ if info.Library == strings.Split(a, "_")[0] && a != requested {
|
|
|
+ alt = append(alt, a)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ slices.Sort(alt)
|
|
|
+ servers = append(servers, alt...)
|
|
|
+ }
|
|
|
+
|
|
|
+ if !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") {
|
|
|
+ // Load up the best CPU variant if not primary requested
|
|
|
+ if info.Library != "cpu" {
|
|
|
+ variant := gpu.GetCPUCapability()
|
|
|
+ // If no variant, then we fall back to default
|
|
|
+ // If we have a variant, try that if we find an exact match
|
|
|
+ // Attempting to run the wrong CPU instructions will panic the
|
|
|
+ // process
|
|
|
+ if variant != gpu.CPUCapabilityNone {
|
|
|
+ for cmp := range availableServers {
|
|
|
+ if cmp == "cpu_"+variant.String() {
|
|
|
+ servers = append(servers, cmp)
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ servers = append(servers, "cpu")
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if len(servers) == 0 {
|
|
|
+ servers = []string{"cpu"}
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return servers
|
|
|
+}
|
|
|
+
|
|
|
+// Return the optimal server for this CPU architecture
|
|
|
+func ServerForCpu() string {
|
|
|
+ if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
|
|
+ return "metal"
|
|
|
+ }
|
|
|
+ variant := gpu.GetCPUCapability()
|
|
|
+ availableServers := GetAvailableServers(runnersDir)
|
|
|
+ if variant != gpu.CPUCapabilityNone {
|
|
|
+ for cmp := range availableServers {
|
|
|
+ if cmp == "cpu_"+variant.String() {
|
|
|
+ return cmp
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return "cpu"
|
|
|
+}
|