123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384 |
- package runners
- import (
- "compress/gzip"
- "errors"
- "fmt"
- "io"
- "io/fs"
- "log/slog"
- "os"
- "path/filepath"
- "runtime"
- "slices"
- "strconv"
- "strings"
- "sync"
- "syscall"
- "golang.org/x/sync/errgroup"
- "github.com/ollama/ollama/envconfig"
- "github.com/ollama/ollama/gpu"
- )
- const (
- binGlob = "*/*/*/*"
- )
- var (
- lock sync.Mutex
- runnersDir = ""
- )
- // Return the location where runners are stored
- // If runners are payloads, this will either extract them
- // or refresh them if any have disappeared due to tmp cleaners
- func Refresh(payloadFS fs.FS) (string, error) {
- lock.Lock()
- defer lock.Unlock()
- var err error
- // Wire up extra logging on our first load
- if runnersDir == "" {
- defer func() {
- var runners []string
- for v := range GetAvailableServers(runnersDir) {
- runners = append(runners, v)
- }
- slog.Info("Dynamic LLM libraries", "runners", runners)
- slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
- }()
- }
- if hasPayloads(payloadFS) {
- if runnersDir == "" {
- runnersDir, err = extractRunners(payloadFS)
- } else {
- err = refreshRunners(payloadFS, runnersDir)
- }
- } else if runnersDir == "" {
- runnersDir, err = locateRunners()
- }
- return runnersDir, err
- }
- func Cleanup(payloadFS fs.FS) {
- lock.Lock()
- defer lock.Unlock()
- if hasPayloads(payloadFS) && runnersDir != "" {
- // We want to fully clean up the tmpdir parent of the payloads dir
- tmpDir := filepath.Clean(filepath.Join(runnersDir, ".."))
- slog.Debug("cleaning up", "dir", tmpDir)
- err := os.RemoveAll(tmpDir)
- if err != nil {
- slog.Warn("failed to clean up", "dir", tmpDir, "err", err)
- }
- }
- }
- func locateRunners() (string, error) {
- exe, err := os.Executable()
- if err != nil {
- return "", err
- }
- cwd, err := os.Getwd()
- if err != nil {
- return "", err
- }
- var paths []string
- for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe()), cwd} {
- paths = append(paths,
- root,
- filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
- filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
- )
- }
- // Try a few variations to improve developer experience when building from source in the local tree
- for _, path := range paths {
- candidate := filepath.Join(path, "lib", "ollama", "runners")
- if _, err := os.Stat(candidate); err == nil {
- return candidate, nil
- }
- }
- return "", fmt.Errorf("unable to locate runners in any search path %v", paths)
- }
- // Return true if we're carying nested payloads for the runners
- func hasPayloads(payloadFS fs.FS) bool {
- files, err := fs.Glob(payloadFS, binGlob)
- if err != nil || len(files) == 0 || (len(files) == 1 && strings.Contains(files[0], "placeholder")) {
- return false
- }
- return true
- }
- func extractRunners(payloadFS fs.FS) (string, error) {
- cleanupTmpDirs()
- tmpDir, err := os.MkdirTemp(envconfig.TempDir(), "ollama")
- if err != nil {
- return "", fmt.Errorf("failed to generate tmp dir: %w", err)
- }
- // Track our pid so we can clean up orphaned tmpdirs
- n := filepath.Join(tmpDir, "ollama.pid")
- if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
- slog.Warn("failed to write pid file", "file", n, "error", err)
- }
- // We create a distinct subdirectory for payloads within the tmpdir
- // This will typically look like /tmp/ollama3208993108/runners on linux
- rDir := filepath.Join(tmpDir, "runners")
- slog.Info("extracting embedded files", "dir", rDir)
- return rDir, refreshRunners(payloadFS, rDir)
- }
- func refreshRunners(payloadFS fs.FS, rDir string) error {
- // extract or refresh server libraries
- err := extractFiles(payloadFS, rDir, binGlob)
- if err != nil {
- return fmt.Errorf("extract binaries: %v", err)
- }
- return nil
- }
- // extract extracts the embedded files to the target directory
- func extractFiles(payloadFS fs.FS, targetDir string, glob string) error {
- files, err := fs.Glob(payloadFS, glob)
- if err != nil || len(files) == 0 {
- // Should not happen
- return fmt.Errorf("extractFiles called without payload present")
- }
- if err := os.MkdirAll(targetDir, 0o755); err != nil {
- return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err)
- }
- g := new(errgroup.Group)
- // $OS/$GOARCH/$RUNNER/$FILE
- for _, file := range files {
- filename := file
- runner := filepath.Base(filepath.Dir(filename))
- slog.Debug("extracting", "runner", runner, "payload", filename)
- g.Go(func() error {
- srcf, err := payloadFS.Open(filename)
- if err != nil {
- return err
- }
- defer srcf.Close()
- src := io.Reader(srcf)
- if strings.HasSuffix(filename, ".gz") {
- src, err = gzip.NewReader(src)
- if err != nil {
- return fmt.Errorf("decompress payload %s: %v", filename, err)
- }
- filename = strings.TrimSuffix(filename, ".gz")
- }
- runnerDir := filepath.Join(targetDir, runner)
- if err := os.MkdirAll(runnerDir, 0o755); err != nil {
- return fmt.Errorf("extractFiles could not mkdir %s: %v", runnerDir, err)
- }
- base := filepath.Base(filename)
- destFilename := filepath.Join(runnerDir, base)
- _, err = os.Stat(destFilename)
- switch {
- case errors.Is(err, os.ErrNotExist):
- destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
- if err != nil {
- return fmt.Errorf("write payload %s: %v", filename, err)
- }
- defer destFile.Close()
- if _, err := io.Copy(destFile, src); err != nil {
- return fmt.Errorf("copy payload %s: %v", filename, err)
- }
- case err != nil:
- return fmt.Errorf("stat payload %s: %v", filename, err)
- }
- return nil
- })
- }
- err = g.Wait()
- if err != nil {
- slog.Error("failed to extract files", "error", err)
- // If we fail to extract, the payload dir is most likely unusable, so cleanup whatever we extracted
- err := os.RemoveAll(targetDir)
- if err != nil {
- slog.Warn("failed to cleanup incomplete payload dir", "dir", targetDir, "error", err)
- }
- return err
- }
- return nil
- }
- // Best effort to clean up prior tmpdirs
- func cleanupTmpDirs() {
- tmpDir := envconfig.TempDir()
- if tmpDir == "" {
- tmpDir = os.TempDir()
- }
- matches, err := filepath.Glob(filepath.Join(tmpDir, "ollama*", "ollama.pid"))
- if err != nil {
- return
- }
- for _, match := range matches {
- raw, err := os.ReadFile(match)
- if errors.Is(err, os.ErrNotExist) {
- slog.Debug("not a ollama runtime directory, skipping", "path", match)
- continue
- } else if err != nil {
- slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
- continue
- }
- pid, err := strconv.Atoi(string(raw))
- if err != nil {
- slog.Warn("invalid pid, skipping", "path", match, "error", err)
- continue
- }
- p, err := os.FindProcess(pid)
- if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
- slog.Warn("process still running, skipping", "pid", pid, "path", match)
- continue
- }
- if err := os.Remove(match); err != nil {
- slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
- }
- runners := filepath.Join(filepath.Dir(match), "runners")
- if err := os.RemoveAll(runners); err != nil {
- slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
- }
- if err := os.Remove(filepath.Dir(match)); err != nil {
- slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
- }
- }
- }
- // directory names are the name of the runner and may contain an optional
- // variant prefixed with '_' as the separator. For example, "cuda_v11" and
- // "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the
- // lowest common denominator
- func GetAvailableServers(payloadsDir string) map[string]string {
- if payloadsDir == "" {
- slog.Error("empty runner dir")
- return nil
- }
- // glob payloadsDir for files that start with ollama_
- pattern := filepath.Join(payloadsDir, "*", "ollama_*")
- files, err := filepath.Glob(pattern)
- if err != nil {
- slog.Debug("could not glob", "pattern", pattern, "error", err)
- return nil
- }
- servers := make(map[string]string)
- for _, file := range files {
- slog.Debug("availableServers : found", "file", file)
- servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file)
- }
- return servers
- }
- // serversForGpu returns a list of compatible servers give the provided GPU
- // info, ordered by performance. assumes Init() has been called
- // TODO - switch to metadata based mapping
- func ServersForGpu(info gpu.GpuInfo) []string {
- // glob workDir for files that start with ollama_
- availableServers := GetAvailableServers(runnersDir)
- requested := info.Library
- if info.Variant != gpu.CPUCapabilityNone.String() {
- requested += "_" + info.Variant
- }
- servers := []string{}
- // exact match first
- for a := range availableServers {
- if a == requested {
- servers = []string{a}
- if a == "metal" {
- return servers
- }
- break
- }
- }
- alt := []string{}
- // Then for GPUs load alternates and sort the list for consistent load ordering
- if info.Library != "cpu" {
- for a := range availableServers {
- if info.Library == strings.Split(a, "_")[0] && a != requested {
- alt = append(alt, a)
- }
- }
- slices.Sort(alt)
- servers = append(servers, alt...)
- }
- if !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") {
- // Load up the best CPU variant if not primary requested
- if info.Library != "cpu" {
- variant := gpu.GetCPUCapability()
- // If no variant, then we fall back to default
- // If we have a variant, try that if we find an exact match
- // Attempting to run the wrong CPU instructions will panic the
- // process
- if variant != gpu.CPUCapabilityNone {
- for cmp := range availableServers {
- if cmp == "cpu_"+variant.String() {
- servers = append(servers, cmp)
- break
- }
- }
- } else {
- servers = append(servers, "cpu")
- }
- }
- if len(servers) == 0 {
- servers = []string{"cpu"}
- }
- }
- return servers
- }
- // Return the optimal server for this CPU architecture
- func ServerForCpu() string {
- if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
- return "metal"
- }
- variant := gpu.GetCPUCapability()
- availableServers := GetAvailableServers(runnersDir)
- if variant != gpu.CPUCapabilityNone {
- for cmp := range availableServers {
- if cmp == "cpu_"+variant.String() {
- return cmp
- }
- }
- }
- return "cpu"
- }
|