config.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. package envconfig
  2. import (
  3. "fmt"
  4. "log/slog"
  5. "math"
  6. "net"
  7. "net/url"
  8. "os"
  9. "path/filepath"
  10. "runtime"
  11. "strconv"
  12. "strings"
  13. "time"
  14. )
  15. // Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
  16. // Default is scheme "http" and host "127.0.0.1:11434"
  17. func Host() *url.URL {
  18. defaultPort := "11434"
  19. s := strings.TrimSpace(Var("OLLAMA_HOST"))
  20. scheme, hostport, ok := strings.Cut(s, "://")
  21. switch {
  22. case !ok:
  23. scheme, hostport = "http", s
  24. case scheme == "http":
  25. defaultPort = "80"
  26. case scheme == "https":
  27. defaultPort = "443"
  28. }
  29. hostport, path, _ := strings.Cut(hostport, "/")
  30. host, port, err := net.SplitHostPort(hostport)
  31. if err != nil {
  32. host, port = "127.0.0.1", defaultPort
  33. if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
  34. host = ip.String()
  35. } else if hostport != "" {
  36. host = hostport
  37. }
  38. }
  39. if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
  40. slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
  41. port = defaultPort
  42. }
  43. return &url.URL{
  44. Scheme: scheme,
  45. Host: net.JoinHostPort(host, port),
  46. Path: path,
  47. }
  48. }
  49. // Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
  50. func Origins() (origins []string) {
  51. if s := Var("OLLAMA_ORIGINS"); s != "" {
  52. origins = strings.Split(s, ",")
  53. }
  54. for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
  55. origins = append(origins,
  56. fmt.Sprintf("http://%s", origin),
  57. fmt.Sprintf("https://%s", origin),
  58. fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
  59. fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
  60. )
  61. }
  62. origins = append(origins,
  63. "app://*",
  64. "file://*",
  65. "tauri://*",
  66. )
  67. return origins
  68. }
  69. // Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
  70. // Default is $HOME/.ollama/models
  71. func Models() string {
  72. if s := Var("OLLAMA_MODELS"); s != "" {
  73. return s
  74. }
  75. home, err := os.UserHomeDir()
  76. if err != nil {
  77. panic(err)
  78. }
  79. return filepath.Join(home, ".ollama", "models")
  80. }
  81. // KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
  82. // Negative values are treated as infinite. Zero is treated as no keep alive.
  83. // Default is 5 minutes.
  84. func KeepAlive() (keepAlive time.Duration) {
  85. keepAlive = 5 * time.Minute
  86. if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
  87. if d, err := time.ParseDuration(s); err == nil {
  88. keepAlive = d
  89. } else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
  90. keepAlive = time.Duration(n) * time.Second
  91. }
  92. }
  93. if keepAlive < 0 {
  94. return time.Duration(math.MaxInt64)
  95. }
  96. return keepAlive
  97. }
  98. // LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
  99. // Zero or Negative values are treated as infinite.
  100. // Default is 5 minutes.
  101. func LoadTimeout() (loadTimeout time.Duration) {
  102. loadTimeout = 5 * time.Minute
  103. if s := Var("OLLAMA_LOAD_TIMEOUT"); s != "" {
  104. if d, err := time.ParseDuration(s); err == nil {
  105. loadTimeout = d
  106. } else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
  107. loadTimeout = time.Duration(n) * time.Second
  108. }
  109. }
  110. if loadTimeout <= 0 {
  111. return time.Duration(math.MaxInt64)
  112. }
  113. return loadTimeout
  114. }
  115. func Bool(k string) func() bool {
  116. return func() bool {
  117. if s := Var(k); s != "" {
  118. b, err := strconv.ParseBool(s)
  119. if err != nil {
  120. return true
  121. }
  122. return b
  123. }
  124. return false
  125. }
  126. }
  127. var (
  128. // Debug enabled additional debug information.
  129. Debug = Bool("OLLAMA_DEBUG")
  130. // FlashAttention enables the experimental flash attention feature.
  131. FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
  132. // NoHistory disables readline history.
  133. NoHistory = Bool("OLLAMA_NOHISTORY")
  134. // NoPrune disables pruning of model blobs on startup.
  135. NoPrune = Bool("OLLAMA_NOPRUNE")
  136. // SchedSpread allows scheduling models across all GPUs.
  137. SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
  138. // IntelGPU enables experimental Intel GPU detection.
  139. IntelGPU = Bool("OLLAMA_INTEL_GPU")
  140. // MultiUserCache optimizes prompt caching for multi-user scenarios
  141. MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
  142. )
  143. func String(s string) func() string {
  144. return func() string {
  145. return Var(s)
  146. }
  147. }
  148. var (
  149. LLMLibrary = String("OLLAMA_LLM_LIBRARY")
  150. TmpDir = String("OLLAMA_TMPDIR")
  151. CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES")
  152. HipVisibleDevices = String("HIP_VISIBLE_DEVICES")
  153. RocrVisibleDevices = String("ROCR_VISIBLE_DEVICES")
  154. GpuDeviceOrdinal = String("GPU_DEVICE_ORDINAL")
  155. HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
  156. )
  157. func Uint(key string, defaultValue uint) func() uint {
  158. return func() uint {
  159. if s := Var(key); s != "" {
  160. if n, err := strconv.ParseUint(s, 10, 64); err != nil {
  161. slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
  162. } else {
  163. return uint(n)
  164. }
  165. }
  166. return defaultValue
  167. }
  168. }
  169. var (
  170. // NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
  171. NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
  172. // MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
  173. MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
  174. // MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
  175. MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
  176. // MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
  177. MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
  178. )
  179. func Uint64(key string, defaultValue uint64) func() uint64 {
  180. return func() uint64 {
  181. if s := Var(key); s != "" {
  182. if n, err := strconv.ParseUint(s, 10, 64); err != nil {
  183. slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
  184. } else {
  185. return n
  186. }
  187. }
  188. return defaultValue
  189. }
  190. }
  191. // Set aside VRAM per GPU
  192. var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
  193. type EnvVar struct {
  194. Name string
  195. Value any
  196. Description string
  197. }
  198. func AsMap() map[string]EnvVar {
  199. ret := map[string]EnvVar{
  200. "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
  201. "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
  202. "OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
  203. "OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
  204. "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
  205. "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
  206. "OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
  207. "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
  208. "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
  209. "OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
  210. "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
  211. "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
  212. "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
  213. "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
  214. "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
  215. "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
  216. "OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
  217. // Informational
  218. "HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
  219. "HTTPS_PROXY": {"HTTPS_PROXY", String("HTTPS_PROXY")(), "HTTPS proxy"},
  220. "NO_PROXY": {"NO_PROXY", String("NO_PROXY")(), "No proxy"},
  221. }
  222. if runtime.GOOS != "windows" {
  223. // Windows environment variables are case-insensitive so there's no need to duplicate them
  224. ret["http_proxy"] = EnvVar{"http_proxy", String("http_proxy")(), "HTTP proxy"}
  225. ret["https_proxy"] = EnvVar{"https_proxy", String("https_proxy")(), "HTTPS proxy"}
  226. ret["no_proxy"] = EnvVar{"no_proxy", String("no_proxy")(), "No proxy"}
  227. }
  228. if runtime.GOOS != "darwin" {
  229. ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
  230. ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
  231. ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
  232. ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
  233. ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
  234. ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
  235. }
  236. return ret
  237. }
  238. func Values() map[string]string {
  239. vals := make(map[string]string)
  240. for k, v := range AsMap() {
  241. vals[k] = fmt.Sprintf("%v", v.Value)
  242. }
  243. return vals
  244. }
  245. // Var returns an environment variable stripped of leading and trailing quotes or spaces
  246. func Var(key string) string {
  247. return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
  248. }
  249. // On windows, we keep the binary at the top directory, but
  250. // other platforms use a "bin" directory, so this returns ".."
  251. func LibRelativeToExe() string {
  252. if runtime.GOOS == "windows" {
  253. return "."
  254. }
  255. return ".."
  256. }