config.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. package envconfig
  2. import (
  3. "fmt"
  4. "log/slog"
  5. "math"
  6. "net"
  7. "net/url"
  8. "os"
  9. "path/filepath"
  10. "runtime"
  11. "strconv"
  12. "strings"
  13. "time"
  14. )
  15. // Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
  16. // Default is scheme "http" and host "127.0.0.1:11434"
  17. func Host() *url.URL {
  18. defaultPort := "11434"
  19. s := strings.TrimSpace(Var("OLLAMA_HOST"))
  20. scheme, hostport, ok := strings.Cut(s, "://")
  21. switch {
  22. case !ok:
  23. scheme, hostport = "http", s
  24. case scheme == "http":
  25. defaultPort = "80"
  26. case scheme == "https":
  27. defaultPort = "443"
  28. }
  29. hostport, path, _ := strings.Cut(hostport, "/")
  30. host, port, err := net.SplitHostPort(hostport)
  31. if err != nil {
  32. host, port = "127.0.0.1", defaultPort
  33. if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
  34. host = ip.String()
  35. } else if hostport != "" {
  36. host = hostport
  37. }
  38. }
  39. if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
  40. slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
  41. port = defaultPort
  42. }
  43. return &url.URL{
  44. Scheme: scheme,
  45. Host: net.JoinHostPort(host, port),
  46. Path: path,
  47. }
  48. }
  49. // AllowedOrigins returns a list of allowed origins. AllowedOrigins can be configured via the OLLAMA_ORIGINS environment variable.
  50. func AllowedOrigins() (origins []string) {
  51. if s := Var("OLLAMA_ORIGINS"); s != "" {
  52. origins = strings.Split(s, ",")
  53. }
  54. for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
  55. origins = append(origins,
  56. fmt.Sprintf("http://%s", origin),
  57. fmt.Sprintf("https://%s", origin),
  58. fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
  59. fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
  60. )
  61. }
  62. origins = append(origins,
  63. "app://*",
  64. "file://*",
  65. "tauri://*",
  66. "vscode-webview://*",
  67. "vscode-file://*",
  68. )
  69. return origins
  70. }
  71. // Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
  72. // Default is $HOME/.ollama/models
  73. func Models() string {
  74. if s := Var("OLLAMA_MODELS"); s != "" {
  75. return s
  76. }
  77. home, err := os.UserHomeDir()
  78. if err != nil {
  79. panic(err)
  80. }
  81. return filepath.Join(home, ".ollama", "models")
  82. }
  83. // KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
  84. // Negative values are treated as infinite. Zero is treated as no keep alive.
  85. // Default is 5 minutes.
  86. func KeepAlive() (keepAlive time.Duration) {
  87. keepAlive = 5 * time.Minute
  88. if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
  89. if d, err := time.ParseDuration(s); err == nil {
  90. keepAlive = d
  91. } else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
  92. keepAlive = time.Duration(n) * time.Second
  93. }
  94. }
  95. if keepAlive < 0 {
  96. return time.Duration(math.MaxInt64)
  97. }
  98. return keepAlive
  99. }
  100. // LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
  101. // Zero or Negative values are treated as infinite.
  102. // Default is 5 minutes.
  103. func LoadTimeout() (loadTimeout time.Duration) {
  104. loadTimeout = 5 * time.Minute
  105. if s := Var("OLLAMA_LOAD_TIMEOUT"); s != "" {
  106. if d, err := time.ParseDuration(s); err == nil {
  107. loadTimeout = d
  108. } else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
  109. loadTimeout = time.Duration(n) * time.Second
  110. }
  111. }
  112. if loadTimeout <= 0 {
  113. return time.Duration(math.MaxInt64)
  114. }
  115. return loadTimeout
  116. }
  117. func Bool(k string) func() bool {
  118. return func() bool {
  119. if s := Var(k); s != "" {
  120. b, err := strconv.ParseBool(s)
  121. if err != nil {
  122. return true
  123. }
  124. return b
  125. }
  126. return false
  127. }
  128. }
  129. var (
  130. // Debug enabled additional debug information.
  131. Debug = Bool("OLLAMA_DEBUG")
  132. // FlashAttention enables the experimental flash attention feature.
  133. FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
  134. // KvCacheType is the quantization type for the K/V cache.
  135. KvCacheType = String("OLLAMA_KV_CACHE_TYPE")
  136. // NoHistory disables readline history.
  137. NoHistory = Bool("OLLAMA_NOHISTORY")
  138. // NoPrune disables pruning of model blobs on startup.
  139. NoPrune = Bool("OLLAMA_NOPRUNE")
  140. // SchedSpread allows scheduling models across all GPUs.
  141. SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
  142. // IntelGPU enables experimental Intel GPU detection.
  143. IntelGPU = Bool("OLLAMA_INTEL_GPU")
  144. // MultiUserCache optimizes prompt caching for multi-user scenarios
  145. MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
  146. // Enable the new Ollama engine
  147. NewEngine = Bool("OLLAMA_NEW_ENGINE")
  148. // ContextLength sets the default context length
  149. ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 2048)
  150. )
  151. func String(s string) func() string {
  152. return func() string {
  153. return Var(s)
  154. }
  155. }
  156. var (
  157. LLMLibrary = String("OLLAMA_LLM_LIBRARY")
  158. CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES")
  159. HipVisibleDevices = String("HIP_VISIBLE_DEVICES")
  160. RocrVisibleDevices = String("ROCR_VISIBLE_DEVICES")
  161. GpuDeviceOrdinal = String("GPU_DEVICE_ORDINAL")
  162. HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
  163. )
  164. func Uint(key string, defaultValue uint) func() uint {
  165. return func() uint {
  166. if s := Var(key); s != "" {
  167. if n, err := strconv.ParseUint(s, 10, 64); err != nil {
  168. slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
  169. } else {
  170. return uint(n)
  171. }
  172. }
  173. return defaultValue
  174. }
  175. }
  176. var (
  177. // NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
  178. NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
  179. // MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
  180. MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
  181. // MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
  182. MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
  183. // MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
  184. MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
  185. )
  186. func Uint64(key string, defaultValue uint64) func() uint64 {
  187. return func() uint64 {
  188. if s := Var(key); s != "" {
  189. if n, err := strconv.ParseUint(s, 10, 64); err != nil {
  190. slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
  191. } else {
  192. return n
  193. }
  194. }
  195. return defaultValue
  196. }
  197. }
  198. // Set aside VRAM per GPU
  199. var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
  200. type EnvVar struct {
  201. Name string
  202. Value any
  203. Description string
  204. }
  205. func AsMap() map[string]EnvVar {
  206. ret := map[string]EnvVar{
  207. "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
  208. "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
  209. "OLLAMA_KV_CACHE_TYPE": {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
  210. "OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
  211. "OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
  212. "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
  213. "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
  214. "OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
  215. "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
  216. "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
  217. "OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
  218. "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
  219. "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
  220. "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
  221. "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
  222. "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
  223. "OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
  224. "OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 2048)"},
  225. "OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
  226. // Informational
  227. "HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
  228. "HTTPS_PROXY": {"HTTPS_PROXY", String("HTTPS_PROXY")(), "HTTPS proxy"},
  229. "NO_PROXY": {"NO_PROXY", String("NO_PROXY")(), "No proxy"},
  230. }
  231. if runtime.GOOS != "windows" {
  232. // Windows environment variables are case-insensitive so there's no need to duplicate them
  233. ret["http_proxy"] = EnvVar{"http_proxy", String("http_proxy")(), "HTTP proxy"}
  234. ret["https_proxy"] = EnvVar{"https_proxy", String("https_proxy")(), "HTTPS proxy"}
  235. ret["no_proxy"] = EnvVar{"no_proxy", String("no_proxy")(), "No proxy"}
  236. }
  237. if runtime.GOOS != "darwin" {
  238. ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
  239. ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible by numeric ID"}
  240. ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible by UUID or numeric ID"}
  241. ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible by numeric ID"}
  242. ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
  243. ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
  244. }
  245. return ret
  246. }
  247. func Values() map[string]string {
  248. vals := make(map[string]string)
  249. for k, v := range AsMap() {
  250. vals[k] = fmt.Sprintf("%v", v.Value)
  251. }
  252. return vals
  253. }
  254. // Var returns an environment variable stripped of leading and trailing quotes or spaces
  255. func Var(key string) string {
  256. return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
  257. }