|
@@ -1,11 +1,11 @@
|
|
package envconfig
|
|
package envconfig
|
|
|
|
|
|
import (
|
|
import (
|
|
- "errors"
|
|
|
|
"fmt"
|
|
"fmt"
|
|
"log/slog"
|
|
"log/slog"
|
|
"math"
|
|
"math"
|
|
"net"
|
|
"net"
|
|
|
|
+ "net/url"
|
|
"os"
|
|
"os"
|
|
"path/filepath"
|
|
"path/filepath"
|
|
"runtime"
|
|
"runtime"
|
|
@@ -14,347 +14,271 @@ import (
|
|
"time"
|
|
"time"
|
|
)
|
|
)
|
|
|
|
|
|
-type OllamaHost struct {
|
|
|
|
- Scheme string
|
|
|
|
- Host string
|
|
|
|
- Port string
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-func (o OllamaHost) String() string {
|
|
|
|
- return fmt.Sprintf("%s://%s:%s", o.Scheme, o.Host, o.Port)
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
|
|
|
|
|
|
+// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
|
|
|
|
+// Default is scheme "http" and host "127.0.0.1:11434"
|
|
|
|
+func Host() *url.URL {
|
|
|
|
+ defaultPort := "11434"
|
|
|
|
|
|
-var (
|
|
|
|
- // Set via OLLAMA_ORIGINS in the environment
|
|
|
|
- AllowOrigins []string
|
|
|
|
- // Set via OLLAMA_DEBUG in the environment
|
|
|
|
- Debug bool
|
|
|
|
- // Experimental flash attention
|
|
|
|
- FlashAttention bool
|
|
|
|
- // Set via OLLAMA_HOST in the environment
|
|
|
|
- Host *OllamaHost
|
|
|
|
- // Set via OLLAMA_KEEP_ALIVE in the environment
|
|
|
|
- KeepAlive time.Duration
|
|
|
|
- // Set via OLLAMA_LLM_LIBRARY in the environment
|
|
|
|
- LLMLibrary string
|
|
|
|
- // Set via OLLAMA_MAX_LOADED_MODELS in the environment
|
|
|
|
- MaxRunners int
|
|
|
|
- // Set via OLLAMA_MAX_QUEUE in the environment
|
|
|
|
- MaxQueuedRequests int
|
|
|
|
- // Set via OLLAMA_MODELS in the environment
|
|
|
|
- ModelsDir string
|
|
|
|
- // Set via OLLAMA_NOHISTORY in the environment
|
|
|
|
- NoHistory bool
|
|
|
|
- // Set via OLLAMA_NOPRUNE in the environment
|
|
|
|
- NoPrune bool
|
|
|
|
- // Set via OLLAMA_NUM_PARALLEL in the environment
|
|
|
|
- NumParallel int
|
|
|
|
- // Set via OLLAMA_RUNNERS_DIR in the environment
|
|
|
|
- RunnersDir string
|
|
|
|
- // Set via OLLAMA_SCHED_SPREAD in the environment
|
|
|
|
- SchedSpread bool
|
|
|
|
- // Set via OLLAMA_TMPDIR in the environment
|
|
|
|
- TmpDir string
|
|
|
|
- // Set via OLLAMA_INTEL_GPU in the environment
|
|
|
|
- IntelGpu bool
|
|
|
|
-
|
|
|
|
- // Set via CUDA_VISIBLE_DEVICES in the environment
|
|
|
|
- CudaVisibleDevices string
|
|
|
|
- // Set via HIP_VISIBLE_DEVICES in the environment
|
|
|
|
- HipVisibleDevices string
|
|
|
|
- // Set via ROCR_VISIBLE_DEVICES in the environment
|
|
|
|
- RocrVisibleDevices string
|
|
|
|
- // Set via GPU_DEVICE_ORDINAL in the environment
|
|
|
|
- GpuDeviceOrdinal string
|
|
|
|
- // Set via HSA_OVERRIDE_GFX_VERSION in the environment
|
|
|
|
- HsaOverrideGfxVersion string
|
|
|
|
-)
|
|
|
|
|
|
+ s := strings.TrimSpace(Var("OLLAMA_HOST"))
|
|
|
|
+ scheme, hostport, ok := strings.Cut(s, "://")
|
|
|
|
+ switch {
|
|
|
|
+ case !ok:
|
|
|
|
+ scheme, hostport = "http", s
|
|
|
|
+ case scheme == "http":
|
|
|
|
+ defaultPort = "80"
|
|
|
|
+ case scheme == "https":
|
|
|
|
+ defaultPort = "443"
|
|
|
|
+ }
|
|
|
|
|
|
-type EnvVar struct {
|
|
|
|
- Name string
|
|
|
|
- Value any
|
|
|
|
- Description string
|
|
|
|
-}
|
|
|
|
|
|
+ // trim trailing slashes
|
|
|
|
+ hostport = strings.TrimRight(hostport, "/")
|
|
|
|
|
|
-func AsMap() map[string]EnvVar {
|
|
|
|
- ret := map[string]EnvVar{
|
|
|
|
- "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
|
|
|
- "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
|
|
|
|
- "OLLAMA_HOST": {"OLLAMA_HOST", Host, "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
|
|
|
- "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
|
|
|
|
- "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
|
|
|
|
- "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models per GPU"},
|
|
|
|
- "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
|
|
|
|
- "OLLAMA_MODELS": {"OLLAMA_MODELS", ModelsDir, "The path to the models directory"},
|
|
|
|
- "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"},
|
|
|
|
- "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"},
|
|
|
|
- "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests"},
|
|
|
|
- "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
|
|
|
|
- "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"},
|
|
|
|
- "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread, "Always schedule model across all GPUs"},
|
|
|
|
- "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
|
|
|
|
|
|
+ host, port, err := net.SplitHostPort(hostport)
|
|
|
|
+ if err != nil {
|
|
|
|
+ host, port = "127.0.0.1", defaultPort
|
|
|
|
+ if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
|
|
|
|
+ host = ip.String()
|
|
|
|
+ } else if hostport != "" {
|
|
|
|
+ host = hostport
|
|
|
|
+ }
|
|
}
|
|
}
|
|
- if runtime.GOOS != "darwin" {
|
|
|
|
- ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices, "Set which NVIDIA devices are visible"}
|
|
|
|
- ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices, "Set which AMD devices are visible"}
|
|
|
|
- ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices, "Set which AMD devices are visible"}
|
|
|
|
- ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal, "Set which AMD devices are visible"}
|
|
|
|
- ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion, "Override the gfx used for all detected AMD GPUs"}
|
|
|
|
- ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGpu, "Enable experimental Intel GPU detection"}
|
|
|
|
|
|
+
|
|
|
|
+ if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
|
|
|
|
+ slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
|
|
|
|
+ return &url.URL{
|
|
|
|
+ Scheme: scheme,
|
|
|
|
+ Host: net.JoinHostPort(host, defaultPort),
|
|
|
|
+ }
|
|
}
|
|
}
|
|
- return ret
|
|
|
|
-}
|
|
|
|
|
|
|
|
-func Values() map[string]string {
|
|
|
|
- vals := make(map[string]string)
|
|
|
|
- for k, v := range AsMap() {
|
|
|
|
- vals[k] = fmt.Sprintf("%v", v.Value)
|
|
|
|
|
|
+ return &url.URL{
|
|
|
|
+ Scheme: scheme,
|
|
|
|
+ Host: net.JoinHostPort(host, port),
|
|
}
|
|
}
|
|
- return vals
|
|
|
|
}
|
|
}
|
|
|
|
|
|
-var defaultAllowOrigins = []string{
|
|
|
|
- "localhost",
|
|
|
|
- "127.0.0.1",
|
|
|
|
- "0.0.0.0",
|
|
|
|
-}
|
|
|
|
|
|
+// Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
|
|
|
|
+func Origins() (origins []string) {
|
|
|
|
+ if s := Var("OLLAMA_ORIGINS"); s != "" {
|
|
|
|
+ origins = strings.Split(s, ",")
|
|
|
|
+ }
|
|
|
|
|
|
-// Clean quotes and spaces from the value
|
|
|
|
-func clean(key string) string {
|
|
|
|
- return strings.Trim(os.Getenv(key), "\"' ")
|
|
|
|
-}
|
|
|
|
|
|
+ for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
|
|
|
|
+ origins = append(origins,
|
|
|
|
+ fmt.Sprintf("http://%s", origin),
|
|
|
|
+ fmt.Sprintf("https://%s", origin),
|
|
|
|
+ fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
|
|
|
|
+ fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
|
|
|
|
+ )
|
|
|
|
+ }
|
|
|
|
|
|
-func init() {
|
|
|
|
- // default values
|
|
|
|
- NumParallel = 0 // Autoselect
|
|
|
|
- MaxRunners = 0 // Autoselect
|
|
|
|
- MaxQueuedRequests = 512
|
|
|
|
- KeepAlive = 5 * time.Minute
|
|
|
|
|
|
+ origins = append(origins,
|
|
|
|
+ "app://*",
|
|
|
|
+ "file://*",
|
|
|
|
+ "tauri://*",
|
|
|
|
+ )
|
|
|
|
|
|
- LoadConfig()
|
|
|
|
|
|
+ return origins
|
|
}
|
|
}
|
|
|
|
|
|
-func LoadConfig() {
|
|
|
|
- if debug := clean("OLLAMA_DEBUG"); debug != "" {
|
|
|
|
- d, err := strconv.ParseBool(debug)
|
|
|
|
- if err == nil {
|
|
|
|
- Debug = d
|
|
|
|
- } else {
|
|
|
|
- Debug = true
|
|
|
|
- }
|
|
|
|
|
|
+// Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
|
|
|
|
+// Default is $HOME/.ollama/models
|
|
|
|
+func Models() string {
|
|
|
|
+ if s := Var("OLLAMA_MODELS"); s != "" {
|
|
|
|
+ return s
|
|
}
|
|
}
|
|
|
|
|
|
- if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" {
|
|
|
|
- d, err := strconv.ParseBool(fa)
|
|
|
|
- if err == nil {
|
|
|
|
- FlashAttention = d
|
|
|
|
- }
|
|
|
|
|
|
+ home, err := os.UserHomeDir()
|
|
|
|
+ if err != nil {
|
|
|
|
+ panic(err)
|
|
}
|
|
}
|
|
|
|
|
|
- RunnersDir = clean("OLLAMA_RUNNERS_DIR")
|
|
|
|
- if runtime.GOOS == "windows" && RunnersDir == "" {
|
|
|
|
- // On Windows we do not carry the payloads inside the main executable
|
|
|
|
- appExe, err := os.Executable()
|
|
|
|
- if err != nil {
|
|
|
|
- slog.Error("failed to lookup executable path", "error", err)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- cwd, err := os.Getwd()
|
|
|
|
- if err != nil {
|
|
|
|
- slog.Error("failed to lookup working directory", "error", err)
|
|
|
|
- }
|
|
|
|
|
|
+ return filepath.Join(home, ".ollama", "models")
|
|
|
|
+}
|
|
|
|
|
|
- var paths []string
|
|
|
|
- for _, root := range []string{filepath.Dir(appExe), cwd} {
|
|
|
|
- paths = append(paths,
|
|
|
|
- root,
|
|
|
|
- filepath.Join(root, "windows-"+runtime.GOARCH),
|
|
|
|
- filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
|
|
|
|
- )
|
|
|
|
|
|
+// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
|
|
|
|
+// Negative values are treated as infinite. Zero is treated as no keep alive.
|
|
|
|
+// Default is 5 minutes.
|
|
|
|
+func KeepAlive() (keepAlive time.Duration) {
|
|
|
|
+ keepAlive = 5 * time.Minute
|
|
|
|
+ if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
|
|
|
|
+ if d, err := time.ParseDuration(s); err == nil {
|
|
|
|
+ keepAlive = d
|
|
|
|
+ } else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
|
|
|
|
+ keepAlive = time.Duration(n) * time.Second
|
|
}
|
|
}
|
|
|
|
+ }
|
|
|
|
|
|
- // Try a few variations to improve developer experience when building from source in the local tree
|
|
|
|
- for _, p := range paths {
|
|
|
|
- candidate := filepath.Join(p, "ollama_runners")
|
|
|
|
- _, err := os.Stat(candidate)
|
|
|
|
- if err == nil {
|
|
|
|
- RunnersDir = candidate
|
|
|
|
- break
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if RunnersDir == "" {
|
|
|
|
- slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
|
|
|
|
- }
|
|
|
|
|
|
+ if keepAlive < 0 {
|
|
|
|
+ return time.Duration(math.MaxInt64)
|
|
}
|
|
}
|
|
|
|
|
|
- TmpDir = clean("OLLAMA_TMPDIR")
|
|
|
|
|
|
+ return keepAlive
|
|
|
|
+}
|
|
|
|
|
|
- LLMLibrary = clean("OLLAMA_LLM_LIBRARY")
|
|
|
|
|
|
+func Bool(k string) func() bool {
|
|
|
|
+ return func() bool {
|
|
|
|
+ if s := Var(k); s != "" {
|
|
|
|
+ b, err := strconv.ParseBool(s)
|
|
|
|
+ if err != nil {
|
|
|
|
+ return true
|
|
|
|
+ }
|
|
|
|
|
|
- if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" {
|
|
|
|
- val, err := strconv.Atoi(onp)
|
|
|
|
- if err != nil {
|
|
|
|
- slog.Error("invalid setting, ignoring", "OLLAMA_NUM_PARALLEL", onp, "error", err)
|
|
|
|
- } else {
|
|
|
|
- NumParallel = val
|
|
|
|
|
|
+ return b
|
|
}
|
|
}
|
|
- }
|
|
|
|
|
|
|
|
- if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" {
|
|
|
|
- NoHistory = true
|
|
|
|
|
|
+ return false
|
|
}
|
|
}
|
|
|
|
+}
|
|
|
|
|
|
- if spread := clean("OLLAMA_SCHED_SPREAD"); spread != "" {
|
|
|
|
- s, err := strconv.ParseBool(spread)
|
|
|
|
- if err == nil {
|
|
|
|
- SchedSpread = s
|
|
|
|
- } else {
|
|
|
|
- SchedSpread = true
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+var (
|
|
|
|
+ // Debug enabled additional debug information.
|
|
|
|
+ Debug = Bool("OLLAMA_DEBUG")
|
|
|
|
+ // FlashAttention enables the experimental flash attention feature.
|
|
|
|
+ FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
|
|
|
|
+ // NoHistory disables readline history.
|
|
|
|
+ NoHistory = Bool("OLLAMA_NOHISTORY")
|
|
|
|
+ // NoPrune disables pruning of model blobs on startup.
|
|
|
|
+ NoPrune = Bool("OLLAMA_NOPRUNE")
|
|
|
|
+ // SchedSpread allows scheduling models across all GPUs.
|
|
|
|
+ SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
|
|
|
|
+ // IntelGPU enables experimental Intel GPU detection.
|
|
|
|
+ IntelGPU = Bool("OLLAMA_INTEL_GPU")
|
|
|
|
+)
|
|
|
|
|
|
- if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
|
|
|
|
- NoPrune = true
|
|
|
|
|
|
+func String(s string) func() string {
|
|
|
|
+ return func() string {
|
|
|
|
+ return Var(s)
|
|
}
|
|
}
|
|
|
|
+}
|
|
|
|
|
|
- if origins := clean("OLLAMA_ORIGINS"); origins != "" {
|
|
|
|
- AllowOrigins = strings.Split(origins, ",")
|
|
|
|
- }
|
|
|
|
- for _, allowOrigin := range defaultAllowOrigins {
|
|
|
|
- AllowOrigins = append(AllowOrigins,
|
|
|
|
- fmt.Sprintf("http://%s", allowOrigin),
|
|
|
|
- fmt.Sprintf("https://%s", allowOrigin),
|
|
|
|
- fmt.Sprintf("http://%s", net.JoinHostPort(allowOrigin, "*")),
|
|
|
|
- fmt.Sprintf("https://%s", net.JoinHostPort(allowOrigin, "*")),
|
|
|
|
- )
|
|
|
|
- }
|
|
|
|
|
|
+var (
|
|
|
|
+ LLMLibrary = String("OLLAMA_LLM_LIBRARY")
|
|
|
|
+ TmpDir = String("OLLAMA_TMPDIR")
|
|
|
|
+
|
|
|
|
+ CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES")
|
|
|
|
+ HipVisibleDevices = String("HIP_VISIBLE_DEVICES")
|
|
|
|
+ RocrVisibleDevices = String("ROCR_VISIBLE_DEVICES")
|
|
|
|
+ GpuDeviceOrdinal = String("GPU_DEVICE_ORDINAL")
|
|
|
|
+ HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
|
|
|
|
+)
|
|
|
|
|
|
- AllowOrigins = append(AllowOrigins,
|
|
|
|
- "app://*",
|
|
|
|
- "file://*",
|
|
|
|
- "tauri://*",
|
|
|
|
- )
|
|
|
|
|
|
+func RunnersDir() (p string) {
|
|
|
|
+ if p := Var("OLLAMA_RUNNERS_DIR"); p != "" {
|
|
|
|
+ return p
|
|
|
|
+ }
|
|
|
|
|
|
- maxRunners := clean("OLLAMA_MAX_LOADED_MODELS")
|
|
|
|
- if maxRunners != "" {
|
|
|
|
- m, err := strconv.Atoi(maxRunners)
|
|
|
|
- if err != nil {
|
|
|
|
- slog.Error("invalid setting, ignoring", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
|
|
|
|
- } else {
|
|
|
|
- MaxRunners = m
|
|
|
|
- }
|
|
|
|
|
|
+ if runtime.GOOS != "windows" {
|
|
|
|
+ return
|
|
}
|
|
}
|
|
|
|
|
|
- if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" {
|
|
|
|
- p, err := strconv.Atoi(onp)
|
|
|
|
- if err != nil || p <= 0 {
|
|
|
|
- slog.Error("invalid setting, ignoring", "OLLAMA_MAX_QUEUE", onp, "error", err)
|
|
|
|
- } else {
|
|
|
|
- MaxQueuedRequests = p
|
|
|
|
|
|
+ defer func() {
|
|
|
|
+ if p == "" {
|
|
|
|
+ slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
|
|
}
|
|
}
|
|
- }
|
|
|
|
|
|
+ }()
|
|
|
|
|
|
- ka := clean("OLLAMA_KEEP_ALIVE")
|
|
|
|
- if ka != "" {
|
|
|
|
- loadKeepAlive(ka)
|
|
|
|
|
|
+ // On Windows we do not carry the payloads inside the main executable
|
|
|
|
+ exe, err := os.Executable()
|
|
|
|
+ if err != nil {
|
|
|
|
+ return
|
|
}
|
|
}
|
|
|
|
|
|
- var err error
|
|
|
|
- ModelsDir, err = getModelsDir()
|
|
|
|
|
|
+ cwd, err := os.Getwd()
|
|
if err != nil {
|
|
if err != nil {
|
|
- slog.Error("invalid setting", "OLLAMA_MODELS", ModelsDir, "error", err)
|
|
|
|
|
|
+ return
|
|
}
|
|
}
|
|
|
|
|
|
- Host, err = getOllamaHost()
|
|
|
|
- if err != nil {
|
|
|
|
- slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port)
|
|
|
|
|
|
+ var paths []string
|
|
|
|
+ for _, root := range []string{filepath.Dir(exe), cwd} {
|
|
|
|
+ paths = append(paths,
|
|
|
|
+ root,
|
|
|
|
+ filepath.Join(root, "windows-"+runtime.GOARCH),
|
|
|
|
+ filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
|
|
|
|
+ )
|
|
}
|
|
}
|
|
|
|
|
|
- if set, err := strconv.ParseBool(clean("OLLAMA_INTEL_GPU")); err == nil {
|
|
|
|
- IntelGpu = set
|
|
|
|
|
|
+ // Try a few variations to improve developer experience when building from source in the local tree
|
|
|
|
+ for _, path := range paths {
|
|
|
|
+ candidate := filepath.Join(path, "ollama_runners")
|
|
|
|
+ if _, err := os.Stat(candidate); err == nil {
|
|
|
|
+ p = candidate
|
|
|
|
+ break
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
- CudaVisibleDevices = clean("CUDA_VISIBLE_DEVICES")
|
|
|
|
- HipVisibleDevices = clean("HIP_VISIBLE_DEVICES")
|
|
|
|
- RocrVisibleDevices = clean("ROCR_VISIBLE_DEVICES")
|
|
|
|
- GpuDeviceOrdinal = clean("GPU_DEVICE_ORDINAL")
|
|
|
|
- HsaOverrideGfxVersion = clean("HSA_OVERRIDE_GFX_VERSION")
|
|
|
|
|
|
+ return p
|
|
}
|
|
}
|
|
|
|
|
|
-func getModelsDir() (string, error) {
|
|
|
|
- if models, exists := os.LookupEnv("OLLAMA_MODELS"); exists {
|
|
|
|
- return models, nil
|
|
|
|
- }
|
|
|
|
- home, err := os.UserHomeDir()
|
|
|
|
- if err != nil {
|
|
|
|
- return "", err
|
|
|
|
|
|
+func Uint(key string, defaultValue uint) func() uint {
|
|
|
|
+ return func() uint {
|
|
|
|
+ if s := Var(key); s != "" {
|
|
|
|
+ if n, err := strconv.ParseUint(s, 10, 64); err != nil {
|
|
|
|
+ slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
|
|
|
|
+ } else {
|
|
|
|
+ return uint(n)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return defaultValue
|
|
}
|
|
}
|
|
- return filepath.Join(home, ".ollama", "models"), nil
|
|
|
|
}
|
|
}
|
|
|
|
|
|
-func getOllamaHost() (*OllamaHost, error) {
|
|
|
|
- defaultPort := "11434"
|
|
|
|
|
|
+var (
|
|
|
|
+ // NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
|
|
|
|
+ NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
|
|
|
|
+ // MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
|
|
|
|
+ MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
|
|
|
|
+ // MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
|
|
|
|
+ MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
|
|
|
|
+ // MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
|
|
|
|
+ MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
|
|
|
|
+)
|
|
|
|
|
|
- hostVar := os.Getenv("OLLAMA_HOST")
|
|
|
|
- hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
|
|
|
|
|
|
+type EnvVar struct {
|
|
|
|
+ Name string
|
|
|
|
+ Value any
|
|
|
|
+ Description string
|
|
|
|
+}
|
|
|
|
|
|
- scheme, hostport, ok := strings.Cut(hostVar, "://")
|
|
|
|
- switch {
|
|
|
|
- case !ok:
|
|
|
|
- scheme, hostport = "http", hostVar
|
|
|
|
- case scheme == "http":
|
|
|
|
- defaultPort = "80"
|
|
|
|
- case scheme == "https":
|
|
|
|
- defaultPort = "443"
|
|
|
|
|
|
+func AsMap() map[string]EnvVar {
|
|
|
|
+ ret := map[string]EnvVar{
|
|
|
|
+ "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
|
|
|
+ "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
|
|
|
|
+ "OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
|
|
|
+ "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
|
|
|
|
+ "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
|
|
|
|
+ "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
|
|
|
|
+ "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
|
|
|
|
+ "OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
|
|
|
|
+ "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
|
|
|
|
+ "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
|
|
|
|
+ "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
|
|
|
|
+ "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
|
|
|
|
+ "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
|
|
|
|
+ "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
|
|
|
|
+ "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
|
|
}
|
|
}
|
|
-
|
|
|
|
- // trim trailing slashes
|
|
|
|
- hostport = strings.TrimRight(hostport, "/")
|
|
|
|
-
|
|
|
|
- host, port, err := net.SplitHostPort(hostport)
|
|
|
|
- if err != nil {
|
|
|
|
- host, port = "127.0.0.1", defaultPort
|
|
|
|
- if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
|
|
|
|
- host = ip.String()
|
|
|
|
- } else if hostport != "" {
|
|
|
|
- host = hostport
|
|
|
|
- }
|
|
|
|
|
|
+ if runtime.GOOS != "darwin" {
|
|
|
|
+ ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
|
|
|
|
+ ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
|
|
|
|
+ ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
|
|
|
|
+ ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
|
|
|
|
+ ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
|
|
|
|
+ ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
|
|
}
|
|
}
|
|
|
|
+ return ret
|
|
|
|
+}
|
|
|
|
|
|
- if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
|
|
|
|
- return &OllamaHost{
|
|
|
|
- Scheme: scheme,
|
|
|
|
- Host: host,
|
|
|
|
- Port: defaultPort,
|
|
|
|
- }, ErrInvalidHostPort
|
|
|
|
|
|
+func Values() map[string]string {
|
|
|
|
+ vals := make(map[string]string)
|
|
|
|
+ for k, v := range AsMap() {
|
|
|
|
+ vals[k] = fmt.Sprintf("%v", v.Value)
|
|
}
|
|
}
|
|
-
|
|
|
|
- return &OllamaHost{
|
|
|
|
- Scheme: scheme,
|
|
|
|
- Host: host,
|
|
|
|
- Port: port,
|
|
|
|
- }, nil
|
|
|
|
|
|
+ return vals
|
|
}
|
|
}
|
|
|
|
|
|
-func loadKeepAlive(ka string) {
|
|
|
|
- v, err := strconv.Atoi(ka)
|
|
|
|
- if err != nil {
|
|
|
|
- d, err := time.ParseDuration(ka)
|
|
|
|
- if err == nil {
|
|
|
|
- if d < 0 {
|
|
|
|
- KeepAlive = time.Duration(math.MaxInt64)
|
|
|
|
- } else {
|
|
|
|
- KeepAlive = d
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- d := time.Duration(v) * time.Second
|
|
|
|
- if d < 0 {
|
|
|
|
- KeepAlive = time.Duration(math.MaxInt64)
|
|
|
|
- } else {
|
|
|
|
- KeepAlive = d
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+// Var returns an environment variable stripped of leading and trailing quotes or spaces
|
|
|
|
+func Var(key string) string {
|
|
|
|
+ return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
|
|
}
|
|
}
|