4 місяців тому · 67bcb55941
--- a/discover/gpu.go
+++ b/discover/gpu.go
@@ -719,23 +719,18 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
 
				 func LibraryDirs() []string {
			
 
				 	// dependencies can exist wherever we found the runners (e.g. build tree for developers) and relative to the executable
			
 
				 	// This can be simplified once we no longer carry runners as payloads
			
 
				-	paths := []string{}
			
 
				-	appExe, err := os.Executable()
			
 
				+	exe, err := os.Executable()
			
 
				 	if err != nil {
			
 
				 		slog.Warn("failed to lookup executable path", "error", err)
			
 
				-	} else {
			
 
				-		appRelative := filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe(), "lib", "ollama")
			
 
				-		if _, err := os.Stat(appRelative); err == nil {
			
 
				-			paths = append(paths, appRelative)
			
 
				-		}
			
 
				+		return nil
			
 
				 	}
			
 
				-	rDir := runners.Locate()
			
 
				-	if err != nil {
			
 
				-		slog.Warn("unable to locate gpu dependency libraries", "error", err)
			
 
				-	} else {
			
 
				-		paths = append(paths, filepath.Dir(rDir))
			
 
				+
			
 
				+	lib := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama")
			
 
				+	if _, err := os.Stat(lib); err != nil {
			
 
				+		return nil
			
 
				 	}
			
 
				-	return paths
			
 
				+
			
 
				+	return []string{lib}
			
 
				 }
			
 
				 
			
 
				 func GetSystemInfo() SystemInfo {
			
--- a/llm/server.go
+++ b/llm/server.go
@@ -30,7 +30,6 @@ import (
 
				 	"github.com/ollama/ollama/format"
			
 
				 	"github.com/ollama/ollama/fs/ggml"
			
 
				 	"github.com/ollama/ollama/llama"
			
 
				-	"github.com/ollama/ollama/runners"
			
 
				 )
			
 
				 
			
 
				 type LlamaServer interface {
			
@@ -91,25 +90,19 @@ func LoadModel(model string, maxArraySize int) (*ggml.GGML, error) {
 
				 // NewLlamaServer will run a server for the given GPUs
			
 
				 // The gpu list must be a single family.
			
 
				 func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapters, projectors []string, opts api.Options, numParallel int) (LlamaServer, error) {
			
 
				-	var err error
			
 
				-	var cpuRunner string
			
 
				-	var estimate MemoryEstimate
			
 
				-	var systemTotalMemory uint64
			
 
				-	var systemFreeMemory uint64
			
 
				-	var systemSwapFreeMemory uint64
			
 
				-
			
 
				 	systemInfo := discover.GetSystemInfo()
			
 
				-	systemTotalMemory = systemInfo.System.TotalMemory
			
 
				-	systemFreeMemory = systemInfo.System.FreeMemory
			
 
				-	systemSwapFreeMemory = systemInfo.System.FreeSwap
			
 
				+	systemTotalMemory := systemInfo.System.TotalMemory
			
 
				+	systemFreeMemory := systemInfo.System.FreeMemory
			
 
				+	systemSwapFreeMemory := systemInfo.System.FreeSwap
			
 
				 	slog.Info("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory))
			
 
				 
			
 
				 	// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
			
 
				 	if opts.NumGPU == 0 {
			
 
				 		gpus = discover.GetCPUInfo()
			
 
				 	}
			
 
				+
			
 
				+	var estimate MemoryEstimate
			
 
				 	if len(gpus) == 1 && gpus[0].Library == "cpu" {
			
 
				-		cpuRunner = runners.ServerForCpu()
			
 
				 		estimate = EstimateGPULayers(gpus, f, projectors, opts)
			
 
				 	} else {
			
 
				 		estimate = EstimateGPULayers(gpus, f, projectors, opts)
			
@@ -121,7 +114,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
 
				 			opts.NumGPU = 0
			
 
				 		case gpus[0].Library != "metal" && estimate.Layers == 0:
			
 
				 			// Don't bother loading into the GPU if no layers can fit
			
 
				-			cpuRunner = runners.ServerForCpu()
			
 
				 			gpus = discover.GetCPUInfo()
			
 
				 		case opts.NumGPU < 0 && estimate.Layers > 0 && gpus[0].Library != "cpu":
			
 
				 			opts.NumGPU = estimate.Layers
			
@@ -141,36 +133,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
 
				 
			
 
				 	slog.Info("offload", "", estimate)
			
 
				 
			
 
				-	// Loop through potential servers
			
 
				-	finalErr := errors.New("no suitable llama servers found")
			
 
				-
			
 
				-	availableServers := runners.GetAvailableServers()
			
 
				-
			
 
				-	var servers []string
			
 
				-	if cpuRunner != "" {
			
 
				-		servers = []string{cpuRunner}
			
 
				-	} else {
			
 
				-		servers = runners.ServersForGpu(gpus[0].RunnerName()) // All GPUs in the list are matching Library and Variant
			
 
				-	}
			
 
				-	demandLib := envconfig.LLMLibrary()
			
 
				-	if demandLib != "" {
			
 
				-		serverPath := availableServers[demandLib]
			
 
				-		if serverPath == "" {
			
 
				-			slog.Info(fmt.Sprintf("Invalid OLLAMA_LLM_LIBRARY %s - not found", demandLib))
			
 
				-		} else {
			
 
				-			slog.Info("user override", "OLLAMA_LLM_LIBRARY", demandLib, "path", serverPath)
			
 
				-			servers = []string{demandLib}
			
 
				-			if strings.HasPrefix(demandLib, "cpu") || (!(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") && demandLib == runners.BuiltinName()) {
			
 
				-				// Omit the GPU flag to silence the warning
			
 
				-				opts.NumGPU = -1
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	if len(servers) == 0 {
			
 
				-		return nil, fmt.Errorf("no servers found for %v", gpus)
			
 
				-	}
			
 
				-
			
 
				 	params := []string{
			
 
				 		"--model", model,
			
 
				 		"--ctx-size", strconv.Itoa(opts.NumCtx),
			
@@ -271,164 +233,149 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
 
				 		params = append(params, "--multiuser-cache")
			
 
				 	}
			
 
				 
			
 
				-	for i := range servers {
			
 
				-		builtin := servers[i] == runners.BuiltinName()
			
 
				-		server := availableServers[servers[i]]
			
 
				-		if server == "" {
			
 
				-			// Shouldn't happen
			
 
				-			finalErr = fmt.Errorf("[%d] server %s not listed in available servers %v", i, servers[i], availableServers)
			
 
				-			slog.Error("server list inconsistent", "error", finalErr)
			
 
				-			continue
			
 
				-		}
			
 
				+	exe, err := os.Executable()
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				 
			
 
				-		if strings.HasPrefix(servers[i], "cpu") || (builtin && !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64")) {
			
 
				-			gpus = discover.GetCPUInfo()
			
 
				+	// Find an availableServers  port, retry on each iteration in case the failure was a port conflict race
			
 
				+	port := 0
			
 
				+	if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil {
			
 
				+		var l *net.TCPListener
			
 
				+		if l, err = net.ListenTCP("tcp", a); err == nil {
			
 
				+			port = l.Addr().(*net.TCPAddr).Port
			
 
				+			l.Close()
			
 
				 		}
			
 
				-
			
 
				-		// Find an availableServers  port, retry on each iteration in case the failure was a port conflict race
			
 
				-		port := 0
			
 
				-		if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil {
			
 
				-			var l *net.TCPListener
			
 
				-			if l, err = net.ListenTCP("tcp", a); err == nil {
			
 
				-				port = l.Addr().(*net.TCPAddr).Port
			
 
				-				l.Close()
			
 
				+	}
			
 
				+	if port == 0 {
			
 
				+		slog.Debug("ResolveTCPAddr failed ", "error", err)
			
 
				+		port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
			
 
				+	}
			
 
				+	finalParams := []string{"runner"}
			
 
				+	finalParams = append(finalParams, params...)
			
 
				+	finalParams = append(finalParams, "--port", strconv.Itoa(port))
			
 
				+
			
 
				+	pathEnv := "LD_LIBRARY_PATH"
			
 
				+	if runtime.GOOS == "windows" {
			
 
				+		pathEnv = "PATH"
			
 
				+	}
			
 
				+	// Start with the server directory for the LD_LIBRARY_PATH/PATH
			
 
				+	libraryPaths := []string{filepath.Dir(exe)}
			
 
				+
			
 
				+	if libraryPath, ok := os.LookupEnv(pathEnv); ok {
			
 
				+		// favor our bundled library dependencies over system libraries
			
 
				+		libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
			
 
				+	}
			
 
				+
			
 
				+	// Note: we always put the dependency path first
			
 
				+	// since this was the exact version we compiled/linked against
			
 
				+	if gpus[0].DependencyPath != nil {
			
 
				+		// assume gpus from the same library have the same dependency path
			
 
				+		libraryPaths = append(gpus[0].DependencyPath, libraryPaths...)
			
 
				+	}
			
 
				+
			
 
				+	// TODO - once fully switched to the Go runner, load the model here for tokenize/detokenize cgo access
			
 
				+	s := &llmServer{
			
 
				+		port:        port,
			
 
				+		cmd:         exec.Command(exe, finalParams...),
			
 
				+		status:      NewStatusWriter(os.Stderr),
			
 
				+		options:     opts,
			
 
				+		modelPath:   model,
			
 
				+		estimate:    estimate,
			
 
				+		numParallel: numParallel,
			
 
				+		sem:         semaphore.NewWeighted(int64(numParallel)),
			
 
				+		totalLayers: f.KV().BlockCount() + 1,
			
 
				+		gpus:        gpus,
			
 
				+		done:        make(chan error, 1),
			
 
				+	}
			
 
				+
			
 
				+	s.cmd.Env = os.Environ()
			
 
				+	s.cmd.Stdout = os.Stdout
			
 
				+	s.cmd.Stderr = s.status
			
 
				+	s.cmd.SysProcAttr = LlamaServerSysProcAttr
			
 
				+
			
 
				+	envWorkarounds := [][2]string{}
			
 
				+	for _, gpu := range gpus {
			
 
				+		envWorkarounds = append(envWorkarounds, gpu.EnvWorkarounds...)
			
 
				+	}
			
 
				+	visibleDevicesEnv, visibleDevicesEnvVal := gpus.GetVisibleDevicesEnv()
			
 
				+	pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
			
 
				+
			
 
				+	// Update or add the path and visible devices variable with our adjusted version
			
 
				+	pathNeeded := true
			
 
				+	devicesNeeded := visibleDevicesEnv != ""
			
 
				+	for i := range s.cmd.Env {
			
 
				+		cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
			
 
				+		if strings.EqualFold(cmp[0], pathEnv) {
			
 
				+			s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
			
 
				+			pathNeeded = false
			
 
				+		} else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) {
			
 
				+			s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal
			
 
				+			devicesNeeded = false
			
 
				+		} else if len(envWorkarounds) != 0 {
			
 
				+			for _, kv := range envWorkarounds {
			
 
				+				if strings.EqualFold(cmp[0], kv[0]) {
			
 
				+					s.cmd.Env[i] = kv[0] + "=" + kv[1]
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				-		if port == 0 {
			
 
				-			slog.Debug("ResolveTCPAddr failed ", "error", err)
			
 
				-			port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
			
 
				-		}
			
 
				-		finalParams := []string{"runner"}
			
 
				-		finalParams = append(finalParams, params...)
			
 
				-		finalParams = append(finalParams, "--port", strconv.Itoa(port))
			
 
				-
			
 
				-		pathEnv := "LD_LIBRARY_PATH"
			
 
				-		if runtime.GOOS == "windows" {
			
 
				-			pathEnv = "PATH"
			
 
				-		}
			
 
				-		// Start with the server directory for the LD_LIBRARY_PATH/PATH
			
 
				-		libraryPaths := []string{filepath.Dir(server)}
			
 
				-
			
 
				-		if libraryPath, ok := os.LookupEnv(pathEnv); ok {
			
 
				-			// favor our bundled library dependencies over system libraries
			
 
				-			libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
			
 
				-		}
			
 
				-
			
 
				-		// Note: we always put the dependency path first
			
 
				-		// since this was the exact version we compiled/linked against
			
 
				-		if gpus[0].DependencyPath != nil {
			
 
				-			// assume gpus from the same library have the same dependency path
			
 
				-			libraryPaths = append(gpus[0].DependencyPath, libraryPaths...)
			
 
				-		}
			
 
				-
			
 
				-		// TODO - once fully switched to the Go runner, load the model here for tokenize/detokenize cgo access
			
 
				-		s := &llmServer{
			
 
				-			port:        port,
			
 
				-			cmd:         exec.Command(server, finalParams...),
			
 
				-			status:      NewStatusWriter(os.Stderr),
			
 
				-			options:     opts,
			
 
				-			modelPath:   model,
			
 
				-			estimate:    estimate,
			
 
				-			numParallel: numParallel,
			
 
				-			sem:         semaphore.NewWeighted(int64(numParallel)),
			
 
				-			totalLayers: f.KV().BlockCount() + 1,
			
 
				-			gpus:        gpus,
			
 
				-			done:        make(chan error, 1),
			
 
				-		}
			
 
				-
			
 
				-		s.cmd.Env = os.Environ()
			
 
				-		s.cmd.Stdout = os.Stdout
			
 
				-		s.cmd.Stderr = s.status
			
 
				-		s.cmd.SysProcAttr = LlamaServerSysProcAttr
			
 
				+	}
			
 
				+	if pathNeeded {
			
 
				+		s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
			
 
				+	}
			
 
				+	if devicesNeeded {
			
 
				+		s.cmd.Env = append(s.cmd.Env, visibleDevicesEnv+"="+visibleDevicesEnvVal)
			
 
				+	}
			
 
				 
			
 
				-		envWorkarounds := [][2]string{}
			
 
				-		for _, gpu := range gpus {
			
 
				-			envWorkarounds = append(envWorkarounds, gpu.EnvWorkarounds...)
			
 
				-		}
			
 
				-		visibleDevicesEnv, visibleDevicesEnvVal := gpus.GetVisibleDevicesEnv()
			
 
				-		pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
			
 
				-
			
 
				-		// Update or add the path and visible devices variable with our adjusted version
			
 
				-		pathNeeded := true
			
 
				-		devicesNeeded := visibleDevicesEnv != ""
			
 
				-		for i := range s.cmd.Env {
			
 
				-			cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
			
 
				-			if strings.EqualFold(cmp[0], pathEnv) {
			
 
				-				s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
			
 
				-				pathNeeded = false
			
 
				-			} else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) {
			
 
				-				s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal
			
 
				-				devicesNeeded = false
			
 
				-			} else if len(envWorkarounds) != 0 {
			
 
				-				for _, kv := range envWorkarounds {
			
 
				-					if strings.EqualFold(cmp[0], kv[0]) {
			
 
				-						s.cmd.Env[i] = kv[0] + "=" + kv[1]
			
 
				-					}
			
 
				-				}
			
 
				+	slog.Info("starting llama server", "cmd", s.cmd.String())
			
 
				+	if envconfig.Debug() {
			
 
				+		filteredEnv := []string{}
			
 
				+		for _, ev := range s.cmd.Env {
			
 
				+			if strings.HasPrefix(ev, "CUDA_") ||
			
 
				+				strings.HasPrefix(ev, "ROCR_") ||
			
 
				+				strings.HasPrefix(ev, "ROCM_") ||
			
 
				+				strings.HasPrefix(ev, "HIP_") ||
			
 
				+				strings.HasPrefix(ev, "GPU_") ||
			
 
				+				strings.HasPrefix(ev, "HSA_") ||
			
 
				+				strings.HasPrefix(ev, "GGML_") ||
			
 
				+				strings.HasPrefix(ev, "PATH=") ||
			
 
				+				strings.HasPrefix(ev, "LD_LIBRARY_PATH=") {
			
 
				+				filteredEnv = append(filteredEnv, ev)
			
 
				 			}
			
 
				 		}
			
 
				-		if pathNeeded {
			
 
				-			s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
			
 
				-		}
			
 
				-		if devicesNeeded {
			
 
				-			s.cmd.Env = append(s.cmd.Env, visibleDevicesEnv+"="+visibleDevicesEnvVal)
			
 
				-		}
			
 
				+		// Log at debug as the environment is inherited and might contain sensitive information
			
 
				+		slog.Debug("subprocess", "environment", filteredEnv)
			
 
				+	}
			
 
				 
			
 
				-		slog.Info("starting llama server", "cmd", s.cmd.String())
			
 
				-		if envconfig.Debug() {
			
 
				-			filteredEnv := []string{}
			
 
				-			for _, ev := range s.cmd.Env {
			
 
				-				if strings.HasPrefix(ev, "CUDA_") ||
			
 
				-					strings.HasPrefix(ev, "ROCR_") ||
			
 
				-					strings.HasPrefix(ev, "ROCM_") ||
			
 
				-					strings.HasPrefix(ev, "HIP_") ||
			
 
				-					strings.HasPrefix(ev, "GPU_") ||
			
 
				-					strings.HasPrefix(ev, "HSA_") ||
			
 
				-					strings.HasPrefix(ev, "GGML_") ||
			
 
				-					strings.HasPrefix(ev, "PATH=") ||
			
 
				-					strings.HasPrefix(ev, "LD_LIBRARY_PATH=") {
			
 
				-					filteredEnv = append(filteredEnv, ev)
			
 
				-				}
			
 
				-			}
			
 
				-			// Log at debug as the environment is inherited and might contain sensitive information
			
 
				-			slog.Debug("subprocess", "environment", filteredEnv)
			
 
				+	if err = s.cmd.Start(); err != nil {
			
 
				+		// Detect permission denied and augment the message about noexec
			
 
				+		if errors.Is(err, os.ErrPermission) {
			
 
				+			return nil, fmt.Errorf("unable to start server %w.  %s may have noexec set.  Set OLLAMA_TMPDIR for server to a writable executable directory", err, exe)
			
 
				 		}
			
 
				 
			
 
				-		if err = s.cmd.Start(); err != nil {
			
 
				-			// Detect permission denied and augment the message about noexec
			
 
				-			if errors.Is(err, os.ErrPermission) {
			
 
				-				finalErr = fmt.Errorf("unable to start server %w.  %s may have noexec set.  Set OLLAMA_TMPDIR for server to a writable executable directory", err, server)
			
 
				-				continue
			
 
				-			}
			
 
				-			msg := ""
			
 
				-			if s.status != nil && s.status.LastErrMsg != "" {
			
 
				-				msg = s.status.LastErrMsg
			
 
				-			}
			
 
				-			err = fmt.Errorf("error starting the external llama server: %v %s", err, msg)
			
 
				-			finalErr = err
			
 
				-			continue
			
 
				+		msg := ""
			
 
				+		if s.status != nil && s.status.LastErrMsg != "" {
			
 
				+			msg = s.status.LastErrMsg
			
 
				 		}
			
 
				+		return nil, fmt.Errorf("error starting the external llama server: %v %s", err, msg)
			
 
				+	}
			
 
				 
			
 
				-		// reap subprocess when it exits
			
 
				-		go func() {
			
 
				-			err := s.cmd.Wait()
			
 
				-			// Favor a more detailed message over the process exit status
			
 
				-			if err != nil && s.status != nil && s.status.LastErrMsg != "" {
			
 
				-				slog.Debug("llama runner terminated", "error", err)
			
 
				-				if strings.Contains(s.status.LastErrMsg, "unknown model") {
			
 
				-					s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
			
 
				-				}
			
 
				-				s.done <- errors.New(s.status.LastErrMsg)
			
 
				-			} else {
			
 
				-				s.done <- err
			
 
				+	// reap subprocess when it exits
			
 
				+	go func() {
			
 
				+		err := s.cmd.Wait()
			
 
				+		// Favor a more detailed message over the process exit status
			
 
				+		if err != nil && s.status != nil && s.status.LastErrMsg != "" {
			
 
				+			slog.Debug("llama runner terminated", "error", err)
			
 
				+			if strings.Contains(s.status.LastErrMsg, "unknown model") {
			
 
				+				s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
			
 
				 			}
			
 
				-		}()
			
 
				-
			
 
				-		return s, nil
			
 
				-	}
			
 
				+			s.done <- errors.New(s.status.LastErrMsg)
			
 
				+		} else {
			
 
				+			s.done <- err
			
 
				+		}
			
 
				+	}()
			
 
				 
			
 
				-	slog.Error("unable to load any llama server", "error", finalErr)
			
 
				-	return nil, finalErr
			
 
				+	return s, nil
			
 
				 }
			
 
				 
			
 
				 type ServerStatus int
			
--- a/runners/common.go
+++ b/runners/common.go
@@ -1,17 +1,9 @@
 
				 package runners
			
 
				 
			
 
				 import (
			
 
				-	"log/slog"
			
 
				-	"os"
			
 
				-	"path/filepath"
			
 
				-	"runtime"
			
 
				-	"slices"
			
 
				-	"strings"
			
 
				 	"sync"
			
 
				 
			
 
				 	"golang.org/x/sys/cpu"
			
 
				-
			
 
				-	"github.com/ollama/ollama/envconfig"
			
 
				 )
			
 
				 
			
 
				 var (
			
@@ -52,155 +44,3 @@ func GetCPUCapability() CPUCapability {
 
				 	// else LCD
			
 
				 	return CPUCapabilityNone
			
 
				 }
			
 
				-
			
 
				-// Return the location where runners were located
			
 
				-// empty string indicates only builtin is present
			
 
				-func Locate() string {
			
 
				-	once.Do(locateRunnersOnce)
			
 
				-	return runnersDir
			
 
				-}
			
 
				-
			
 
				-// searches for runners in a prioritized set of locations
			
 
				-// 1. local build, with executable at the top of the tree
			
 
				-// 2. lib directory relative to executable
			
 
				-func locateRunnersOnce() {
			
 
				-	exe, err := os.Executable()
			
 
				-	if err != nil {
			
 
				-		slog.Debug("runner locate", "error", err)
			
 
				-	}
			
 
				-
			
 
				-	paths := []string{
			
 
				-		filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"),
			
 
				-		filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"),
			
 
				-	}
			
 
				-	for _, path := range paths {
			
 
				-		if _, err := os.Stat(path); err == nil {
			
 
				-			runnersDir = path
			
 
				-			slog.Debug("runners located", "dir", runnersDir)
			
 
				-			return
			
 
				-		}
			
 
				-	}
			
 
				-	// Fall back to built-in
			
 
				-	slog.Debug("no dynamic runners detected, using only built-in")
			
 
				-	runnersDir = ""
			
 
				-}
			
 
				-
			
 
				-// Return the well-known name of the builtin runner for the given platform
			
 
				-func BuiltinName() string {
			
 
				-	if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
			
 
				-		return "metal"
			
 
				-	}
			
 
				-	return "cpu"
			
 
				-}
			
 
				-
			
 
				-// directory names are the name of the runner and may contain an optional
			
 
				-// variant prefixed with '_' as the separator. For example, "cuda_v11" and
			
 
				-// "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the
			
 
				-// lowest common denominator
			
 
				-func GetAvailableServers() map[string]string {
			
 
				-	once.Do(locateRunnersOnce)
			
 
				-
			
 
				-	servers := make(map[string]string)
			
 
				-	exe, err := os.Executable()
			
 
				-	if err == nil {
			
 
				-		servers[BuiltinName()] = exe
			
 
				-	}
			
 
				-
			
 
				-	if runnersDir == "" {
			
 
				-		return servers
			
 
				-	}
			
 
				-
			
 
				-	// glob runnersDir for files that start with ollama_
			
 
				-	pattern := filepath.Join(runnersDir, "*", "ollama_*")
			
 
				-
			
 
				-	files, err := filepath.Glob(pattern)
			
 
				-	if err != nil {
			
 
				-		slog.Debug("could not glob", "pattern", pattern, "error", err)
			
 
				-		return nil
			
 
				-	}
			
 
				-
			
 
				-	for _, file := range files {
			
 
				-		slog.Debug("availableServers : found", "file", file)
			
 
				-		runnerName := filepath.Base(filepath.Dir(file))
			
 
				-		// Special case for our GPU runners - if compiled with standard AVX flag
			
 
				-		// detect incompatible system
			
 
				-		// Custom builds will omit this and its up to the user to ensure compatibility
			
 
				-		parsed := strings.Split(runnerName, "_")
			
 
				-		if len(parsed) == 3 && parsed[2] == "avx" && !cpu.X86.HasAVX {
			
 
				-			slog.Info("GPU runner incompatible with host system, CPU does not have AVX", "runner", runnerName)
			
 
				-			continue
			
 
				-		}
			
 
				-		servers[runnerName] = file
			
 
				-	}
			
 
				-
			
 
				-	return servers
			
 
				-}
			
 
				-
			
 
				-// serversForGpu returns a list of compatible servers give the provided GPU library/variant
			
 
				-func ServersForGpu(requested string) []string {
			
 
				-	// glob workDir for files that start with ollama_
			
 
				-	availableServers := GetAvailableServers()
			
 
				-
			
 
				-	// Short circuit if the only option is built-in
			
 
				-	if _, ok := availableServers[BuiltinName()]; ok && len(availableServers) == 1 {
			
 
				-		return []string{BuiltinName()}
			
 
				-	}
			
 
				-
			
 
				-	bestCPUVariant := GetCPUCapability()
			
 
				-	requestedLib := strings.Split(requested, "_")[0]
			
 
				-	servers := []string{}
			
 
				-
			
 
				-	// exact match first
			
 
				-	for a := range availableServers {
			
 
				-		short := a
			
 
				-		parsed := strings.Split(a, "_")
			
 
				-		if len(parsed) == 3 {
			
 
				-			// Strip off optional _avx for comparison
			
 
				-			short = parsed[0] + "_" + parsed[1]
			
 
				-		}
			
 
				-		if a == requested || short == requested {
			
 
				-			servers = []string{a}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	// If no exact match, then try without variant
			
 
				-	if len(servers) == 0 {
			
 
				-		alt := []string{}
			
 
				-		for a := range availableServers {
			
 
				-			if requestedLib == strings.Split(a, "_")[0] && a != requested {
			
 
				-				alt = append(alt, a)
			
 
				-			}
			
 
				-		}
			
 
				-		slices.Sort(alt)
			
 
				-		servers = append(servers, alt...)
			
 
				-	}
			
 
				-
			
 
				-	// Finally append the best CPU option if found, then builtin
			
 
				-	if bestCPUVariant != CPUCapabilityNone {
			
 
				-		for cmp := range availableServers {
			
 
				-			if cmp == "cpu_"+bestCPUVariant.String() {
			
 
				-				servers = append(servers, cmp)
			
 
				-				break
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	servers = append(servers, BuiltinName())
			
 
				-	return servers
			
 
				-}
			
 
				-
			
 
				-// Return the optimal server for this CPU architecture
			
 
				-func ServerForCpu() string {
			
 
				-	if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
			
 
				-		return BuiltinName()
			
 
				-	}
			
 
				-	variant := GetCPUCapability()
			
 
				-	availableServers := GetAvailableServers()
			
 
				-	if variant != CPUCapabilityNone {
			
 
				-		for cmp := range availableServers {
			
 
				-			if cmp == "cpu_"+variant.String() {
			
 
				-				return cmp
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	return BuiltinName()
			
 
				-}
			
--- a/server/routes.go
+++ b/server/routes.go
@@ -33,7 +33,6 @@ import (
 
				 	"github.com/ollama/ollama/llm"
			
 
				 	"github.com/ollama/ollama/openai"
			
 
				 	"github.com/ollama/ollama/parser"
			
 
				-	"github.com/ollama/ollama/runners"
			
 
				 	"github.com/ollama/ollama/server/imageproc"
			
 
				 	"github.com/ollama/ollama/template"
			
 
				 	"github.com/ollama/ollama/types/errtypes"
			
@@ -1269,14 +1268,6 @@ func Serve(ln net.Listener) error {
 
				 		done()
			
 
				 	}()
			
 
				 
			
 
				-	// Locate and log what runners are present at startup
			
 
				-	var runnerNames []string
			
 
				-	for v := range runners.GetAvailableServers() {
			
 
				-		runnerNames = append(runnerNames, v)
			
 
				-	}
			
 
				-	slog.Info("Dynamic LLM libraries", "runners", runnerNames)
			
 
				-	slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
			
 
				-
			
 
				 	s.sched.Run(schedCtx)
			
 
				 
			
 
				 	// At startup we retrieve GPU information so we can get log messages before loading a model