|
@@ -112,6 +112,26 @@ func KeepAlive() (keepAlive time.Duration) {
|
|
|
return keepAlive
|
|
|
}
|
|
|
|
|
|
+// LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
|
|
|
+// Zero or Negative values are treated as infinite.
|
|
|
+// Default is 5 minutes.
|
|
|
+func LoadTimeout() (loadTimeout time.Duration) {
|
|
|
+ loadTimeout = 5 * time.Minute
|
|
|
+ if s := Var("OLLAMA_LOAD_TIMEOUT"); s != "" {
|
|
|
+ if d, err := time.ParseDuration(s); err == nil {
|
|
|
+ loadTimeout = d
|
|
|
+ } else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
|
|
|
+ loadTimeout = time.Duration(n) * time.Second
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if loadTimeout <= 0 {
|
|
|
+ return time.Duration(math.MaxInt64)
|
|
|
+ }
|
|
|
+
|
|
|
+ return loadTimeout
|
|
|
+}
|
|
|
+
|
|
|
func Bool(k string) func() bool {
|
|
|
return func() bool {
|
|
|
if s := Var(k); s != "" {
|
|
@@ -245,10 +265,8 @@ func Uint64(key string, defaultValue uint64) func() uint64 {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-var (
|
|
|
- // Set aside VRAM per GPU
|
|
|
- GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
|
|
|
-)
|
|
|
+// Set aside VRAM per GPU
|
|
|
+var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
|
|
|
|
|
|
type EnvVar struct {
|
|
|
Name string
|
|
@@ -264,6 +282,7 @@ func AsMap() map[string]EnvVar {
|
|
|
"OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
|
|
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
|
|
|
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
|
|
|
+ "OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
|
|
|
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
|
|
|
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
|
|
|
"OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
|