10 months ago · e2c3f6b3e2
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -149,30 +149,77 @@ var (
 
				 	IntelGPU = Bool("OLLAMA_INTEL_GPU")
			
 
				 )
			
 
				 
			
 
				+func String(s string) func() string {
			
 
				+	return func() string {
			
 
				+		return getenv(s)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+var (
			
 
				+	LLMLibrary = String("OLLAMA_LLM_LIBRARY")
			
 
				+	TmpDir     = String("OLLAMA_TMPDIR")
			
 
				+
			
 
				+	CudaVisibleDevices    = String("CUDA_VISIBLE_DEVICES")
			
 
				+	HipVisibleDevices     = String("HIP_VISIBLE_DEVICES")
			
 
				+	RocrVisibleDevices    = String("ROCR_VISIBLE_DEVICES")
			
 
				+	GpuDeviceOrdinal      = String("GPU_DEVICE_ORDINAL")
			
 
				+	HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
			
 
				+)
			
 
				+
			
 
				+func RunnersDir() (p string) {
			
 
				+	if p := getenv("OLLAMA_RUNNERS_DIR"); p != "" {
			
 
				+		return p
			
 
				+	}
			
 
				+
			
 
				+	if runtime.GOOS != "windows" {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	defer func() {
			
 
				+		if p == "" {
			
 
				+			slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
			
 
				+		}
			
 
				+	}()
			
 
				+
			
 
				+	// On Windows we do not carry the payloads inside the main executable
			
 
				+	exe, err := os.Executable()
			
 
				+	if err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	cwd, err := os.Getwd()
			
 
				+	if err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	var paths []string
			
 
				+	for _, root := range []string{filepath.Dir(exe), cwd} {
			
 
				+		paths = append(paths,
			
 
				+			root,
			
 
				+			filepath.Join(root, "windows-"+runtime.GOARCH),
			
 
				+			filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
			
 
				+		)
			
 
				+	}
			
 
				+
			
 
				+	// Try a few variations to improve developer experience when building from source in the local tree
			
 
				+	for _, path := range paths {
			
 
				+		candidate := filepath.Join(path, "ollama_runners")
			
 
				+		if _, err := os.Stat(candidate); err == nil {
			
 
				+			p = candidate
			
 
				+			break
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return p
			
 
				+}
			
 
				+
			
 
				 var (
			
 
				-	// Set via OLLAMA_LLM_LIBRARY in the environment
			
 
				-	LLMLibrary string
			
 
				 	// Set via OLLAMA_MAX_LOADED_MODELS in the environment
			
 
				 	MaxRunners int
			
 
				 	// Set via OLLAMA_MAX_QUEUE in the environment
			
 
				 	MaxQueuedRequests int
			
 
				 	// Set via OLLAMA_NUM_PARALLEL in the environment
			
 
				 	NumParallel int
			
 
				-	// Set via OLLAMA_RUNNERS_DIR in the environment
			
 
				-	RunnersDir string
			
 
				-	// Set via OLLAMA_TMPDIR in the environment
			
 
				-	TmpDir string
			
 
				-
			
 
				-	// Set via CUDA_VISIBLE_DEVICES in the environment
			
 
				-	CudaVisibleDevices string
			
 
				-	// Set via HIP_VISIBLE_DEVICES in the environment
			
 
				-	HipVisibleDevices string
			
 
				-	// Set via ROCR_VISIBLE_DEVICES in the environment
			
 
				-	RocrVisibleDevices string
			
 
				-	// Set via GPU_DEVICE_ORDINAL in the environment
			
 
				-	GpuDeviceOrdinal string
			
 
				-	// Set via HSA_OVERRIDE_GFX_VERSION in the environment
			
 
				-	HsaOverrideGfxVersion string
			
 
				 )
			
 
				 
			
 
				 type EnvVar struct {
			
@@ -187,7 +234,7 @@ func AsMap() map[string]EnvVar {
 
				 		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
			
 
				 		"OLLAMA_HOST":              {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
			
 
				 		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
			
 
				-		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
			
 
				+		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
			
 
				 		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models per GPU"},
			
 
				 		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
			
 
				 		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
			
@@ -195,16 +242,16 @@ func AsMap() map[string]EnvVar {
 
				 		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
			
 
				 		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests"},
			
 
				 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
			
 
				-		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"},
			
 
				+		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
			
 
				 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
			
 
				-		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
			
 
				+		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
			
 
				 	}
			
 
				 	if runtime.GOOS != "darwin" {
			
 
				-		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices, "Set which NVIDIA devices are visible"}
			
 
				-		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices, "Set which AMD devices are visible"}
			
 
				-		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices, "Set which AMD devices are visible"}
			
 
				-		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal, "Set which AMD devices are visible"}
			
 
				-		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion, "Override the gfx used for all detected AMD GPUs"}
			
 
				+		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
			
 
				+		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
			
 
				+		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
			
 
				+		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
			
 
				+		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
			
 
				 		ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
			
 
				 	}
			
 
				 	return ret
			
@@ -233,46 +280,6 @@ func init() {
 
				 }
			
 
				 
			
 
				 func LoadConfig() {
			
 
				-	RunnersDir = getenv("OLLAMA_RUNNERS_DIR")
			
 
				-	if runtime.GOOS == "windows" && RunnersDir == "" {
			
 
				-		// On Windows we do not carry the payloads inside the main executable
			
 
				-		appExe, err := os.Executable()
			
 
				-		if err != nil {
			
 
				-			slog.Error("failed to lookup executable path", "error", err)
			
 
				-		}
			
 
				-
			
 
				-		cwd, err := os.Getwd()
			
 
				-		if err != nil {
			
 
				-			slog.Error("failed to lookup working directory", "error", err)
			
 
				-		}
			
 
				-
			
 
				-		var paths []string
			
 
				-		for _, root := range []string{filepath.Dir(appExe), cwd} {
			
 
				-			paths = append(paths,
			
 
				-				root,
			
 
				-				filepath.Join(root, "windows-"+runtime.GOARCH),
			
 
				-				filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
			
 
				-			)
			
 
				-		}
			
 
				-
			
 
				-		// Try a few variations to improve developer experience when building from source in the local tree
			
 
				-		for _, p := range paths {
			
 
				-			candidate := filepath.Join(p, "ollama_runners")
			
 
				-			_, err := os.Stat(candidate)
			
 
				-			if err == nil {
			
 
				-				RunnersDir = candidate
			
 
				-				break
			
 
				-			}
			
 
				-		}
			
 
				-		if RunnersDir == "" {
			
 
				-			slog.Error("unable to locate llm runner directory.  Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	TmpDir = getenv("OLLAMA_TMPDIR")
			
 
				-
			
 
				-	LLMLibrary = getenv("OLLAMA_LLM_LIBRARY")
			
 
				-
			
 
				 	if onp := getenv("OLLAMA_NUM_PARALLEL"); onp != "" {
			
 
				 		val, err := strconv.Atoi(onp)
			
 
				 		if err != nil {
			
@@ -300,10 +307,4 @@ func LoadConfig() {
 
				 			MaxQueuedRequests = p
			
 
				 		}
			
 
				 	}
			
 
				-
			
 
				-	CudaVisibleDevices = getenv("CUDA_VISIBLE_DEVICES")
			
 
				-	HipVisibleDevices = getenv("HIP_VISIBLE_DEVICES")
			
 
				-	RocrVisibleDevices = getenv("ROCR_VISIBLE_DEVICES")
			
 
				-	GpuDeviceOrdinal = getenv("GPU_DEVICE_ORDINAL")
			
 
				-	HsaOverrideGfxVersion = getenv("HSA_OVERRIDE_GFX_VERSION")
			
 
				 }
			
--- a/gpu/amd_linux.go
+++ b/gpu/amd_linux.go
@@ -60,9 +60,9 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 
				 
			
 
				 	// Determine if the user has already pre-selected which GPUs to look at, then ignore the others
			
 
				 	var visibleDevices []string
			
 
				-	hipVD := envconfig.HipVisibleDevices   // zero based index only
			
 
				-	rocrVD := envconfig.RocrVisibleDevices // zero based index or UUID, but consumer cards seem to not support UUID
			
 
				-	gpuDO := envconfig.GpuDeviceOrdinal    // zero based index
			
 
				+	hipVD := envconfig.HipVisibleDevices()   // zero based index only
			
 
				+	rocrVD := envconfig.RocrVisibleDevices() // zero based index or UUID, but consumer cards seem to not support UUID
			
 
				+	gpuDO := envconfig.GpuDeviceOrdinal()    // zero based index
			
 
				 	switch {
			
 
				 	// TODO is this priorty order right?
			
 
				 	case hipVD != "":
			
@@ -75,7 +75,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 
				 		visibleDevices = strings.Split(gpuDO, ",")
			
 
				 	}
			
 
				 
			
 
				-	gfxOverride := envconfig.HsaOverrideGfxVersion
			
 
				+	gfxOverride := envconfig.HsaOverrideGfxVersion()
			
 
				 	var supported []string
			
 
				 	libDir := ""
			
 
				 
			
--- a/gpu/amd_windows.go
+++ b/gpu/amd_windows.go
@@ -53,7 +53,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 
				 	}
			
 
				 
			
 
				 	var supported []string
			
 
				-	gfxOverride := envconfig.HsaOverrideGfxVersion
			
 
				+	gfxOverride := envconfig.HsaOverrideGfxVersion()
			
 
				 	if gfxOverride == "" {
			
 
				 		supported, err = GetSupportedGFX(libDir)
			
 
				 		if err != nil {
			
--- a/gpu/assets.go
+++ b/gpu/assets.go
@@ -26,7 +26,7 @@ func PayloadsDir() (string, error) {
 
				 	defer lock.Unlock()
			
 
				 	var err error
			
 
				 	if payloadsDir == "" {
			
 
				-		runnersDir := envconfig.RunnersDir
			
 
				+		runnersDir := envconfig.RunnersDir()
			
 
				 
			
 
				 		if runnersDir != "" {
			
 
				 			payloadsDir = runnersDir
			
@@ -35,7 +35,7 @@ func PayloadsDir() (string, error) {
 
				 
			
 
				 		// The remainder only applies on non-windows where we still carry payloads in the main executable
			
 
				 		cleanupTmpDirs()
			
 
				-		tmpDir := envconfig.TmpDir
			
 
				+		tmpDir := envconfig.TmpDir()
			
 
				 		if tmpDir == "" {
			
 
				 			tmpDir, err = os.MkdirTemp("", "ollama")
			
 
				 			if err != nil {
			
@@ -105,7 +105,7 @@ func cleanupTmpDirs() {
 
				 func Cleanup() {
			
 
				 	lock.Lock()
			
 
				 	defer lock.Unlock()
			
 
				-	runnersDir := envconfig.RunnersDir
			
 
				+	runnersDir := envconfig.RunnersDir()
			
 
				 	if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" {
			
 
				 		// We want to fully clean up the tmpdir parent of the payloads dir
			
 
				 		tmpDir := filepath.Clean(filepath.Join(payloadsDir, ".."))
			
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -230,8 +230,8 @@ func GetGPUInfo() GpuInfoList {
 
				 
			
 
				 		// On windows we bundle the nvidia library one level above the runner dir
			
 
				 		depPath := ""
			
 
				-		if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
			
 
				-			depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "cuda")
			
 
				+		if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" {
			
 
				+			depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "cuda")
			
 
				 		}
			
 
				 
			
 
				 		// Load ALL libraries
			
@@ -306,8 +306,8 @@ func GetGPUInfo() GpuInfoList {
 
				 			oHandles = initOneAPIHandles()
			
 
				 			// On windows we bundle the oneapi library one level above the runner dir
			
 
				 			depPath = ""
			
 
				-			if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
			
 
				-				depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "oneapi")
			
 
				+			if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" {
			
 
				+				depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "oneapi")
			
 
				 			}
			
 
				 
			
 
				 			for d := range oHandles.oneapi.num_drivers {
			
--- a/llm/server.go
+++ b/llm/server.go
@@ -163,7 +163,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
				 	} else {
			
 
				 		servers = serversForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant
			
 
				 	}
			
 
				-	demandLib := envconfig.LLMLibrary
			
 
				+	demandLib := envconfig.LLMLibrary()
			
 
				 	if demandLib != "" {
			
 
				 		serverPath := availableServers[demandLib]
			
 
				 		if serverPath == "" {