123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206 |
- package runners
- import (
- "log/slog"
- "os"
- "path/filepath"
- "runtime"
- "slices"
- "strings"
- "sync"
- "golang.org/x/sys/cpu"
- "github.com/ollama/ollama/envconfig"
- )
- var (
- runnersDir = ""
- once = sync.Once{}
- )
- type CPUCapability uint32
- // Override at build time when building base GPU runners
- // var GPURunnerCPUCapability = CPUCapabilityAVX
- const (
- CPUCapabilityNone CPUCapability = iota
- CPUCapabilityAVX
- CPUCapabilityAVX2
- // TODO AVX512
- )
- func (c CPUCapability) String() string {
- switch c {
- case CPUCapabilityAVX:
- return "avx"
- case CPUCapabilityAVX2:
- return "avx2"
- default:
- return "no vector extensions"
- }
- }
- func GetCPUCapability() CPUCapability {
- if cpu.X86.HasAVX2 {
- return CPUCapabilityAVX2
- }
- if cpu.X86.HasAVX {
- return CPUCapabilityAVX
- }
- // else LCD
- return CPUCapabilityNone
- }
- // Return the location where runners were located
- // empty string indicates only builtin is present
- func Locate() string {
- once.Do(locateRunnersOnce)
- return runnersDir
- }
- // searches for runners in a prioritized set of locations
- // 1. local build, with executable at the top of the tree
- // 2. lib directory relative to executable
- func locateRunnersOnce() {
- exe, err := os.Executable()
- if err != nil {
- slog.Debug("runner locate", "error", err)
- }
- paths := []string{
- filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"),
- filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"),
- }
- for _, path := range paths {
- if _, err := os.Stat(path); err == nil {
- runnersDir = path
- slog.Debug("runners located", "dir", runnersDir)
- return
- }
- }
- // Fall back to built-in
- slog.Debug("no dynamic runners detected, using only built-in")
- runnersDir = ""
- }
- // Return the well-known name of the builtin runner for the given platform
- func BuiltinName() string {
- if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
- return "metal"
- }
- return "cpu"
- }
- // directory names are the name of the runner and may contain an optional
- // variant prefixed with '_' as the separator. For example, "cuda_v11" and
- // "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the
- // lowest common denominator
- func GetAvailableServers() map[string]string {
- once.Do(locateRunnersOnce)
- servers := make(map[string]string)
- exe, err := os.Executable()
- if err == nil {
- servers[BuiltinName()] = exe
- }
- if runnersDir == "" {
- return servers
- }
- // glob runnersDir for files that start with ollama_
- pattern := filepath.Join(runnersDir, "*", "ollama_*")
- files, err := filepath.Glob(pattern)
- if err != nil {
- slog.Debug("could not glob", "pattern", pattern, "error", err)
- return nil
- }
- for _, file := range files {
- slog.Debug("availableServers : found", "file", file)
- runnerName := filepath.Base(filepath.Dir(file))
- // Special case for our GPU runners - if compiled with standard AVX flag
- // detect incompatible system
- // Custom builds will omit this and its up to the user to ensure compatibility
- parsed := strings.Split(runnerName, "_")
- if len(parsed) == 3 && parsed[2] == "avx" && !cpu.X86.HasAVX {
- slog.Info("GPU runner incompatible with host system, CPU does not have AVX", "runner", runnerName)
- continue
- }
- servers[runnerName] = file
- }
- return servers
- }
- // serversForGpu returns a list of compatible servers give the provided GPU library/variant
- func ServersForGpu(requested string) []string {
- // glob workDir for files that start with ollama_
- availableServers := GetAvailableServers()
- // Short circuit if the only option is built-in
- if _, ok := availableServers[BuiltinName()]; ok && len(availableServers) == 1 {
- return []string{BuiltinName()}
- }
- bestCPUVariant := GetCPUCapability()
- requestedLib := strings.Split(requested, "_")[0]
- servers := []string{}
- // exact match first
- for a := range availableServers {
- short := a
- parsed := strings.Split(a, "_")
- if len(parsed) == 3 {
- // Strip off optional _avx for comparison
- short = parsed[0] + "_" + parsed[1]
- }
- if a == requested || short == requested {
- servers = []string{a}
- }
- }
- // If no exact match, then try without variant
- if len(servers) == 0 {
- alt := []string{}
- for a := range availableServers {
- if requestedLib == strings.Split(a, "_")[0] && a != requested {
- alt = append(alt, a)
- }
- }
- slices.Sort(alt)
- servers = append(servers, alt...)
- }
- // Finally append the best CPU option if found, then builtin
- if bestCPUVariant != CPUCapabilityNone {
- for cmp := range availableServers {
- if cmp == "cpu_"+bestCPUVariant.String() {
- servers = append(servers, cmp)
- break
- }
- }
- }
- servers = append(servers, BuiltinName())
- return servers
- }
- // Return the optimal server for this CPU architecture
- func ServerForCpu() string {
- if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
- return BuiltinName()
- }
- variant := GetCPUCapability()
- availableServers := GetAvailableServers()
- if variant != CPUCapabilityNone {
- for cmp := range availableServers {
- if cmp == "cpu_"+variant.String() {
- return cmp
- }
- }
- }
- return BuiltinName()
- }
|