config.go 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. package envconfig
  2. import (
  3. "fmt"
  4. "log/slog"
  5. "math"
  6. "net"
  7. "net/url"
  8. "os"
  9. "path/filepath"
  10. "runtime"
  11. "strconv"
  12. "strings"
  13. "time"
  14. )
  15. // Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
  16. // Default is scheme "http" and host "127.0.0.1:11434"
  17. func Host() *url.URL {
  18. defaultPort := "11434"
  19. s := strings.TrimSpace(Var("OLLAMA_HOST"))
  20. scheme, hostport, ok := strings.Cut(s, "://")
  21. switch {
  22. case !ok:
  23. scheme, hostport = "http", s
  24. case scheme == "http":
  25. defaultPort = "80"
  26. case scheme == "https":
  27. defaultPort = "443"
  28. }
  29. hostport, path, _ := strings.Cut(hostport, "/")
  30. host, port, err := net.SplitHostPort(hostport)
  31. if err != nil {
  32. host, port = "127.0.0.1", defaultPort
  33. if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
  34. host = ip.String()
  35. } else if hostport != "" {
  36. host = hostport
  37. }
  38. }
  39. if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
  40. slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
  41. port = defaultPort
  42. }
  43. return &url.URL{
  44. Scheme: scheme,
  45. Host: net.JoinHostPort(host, port),
  46. Path: path,
  47. }
  48. }
  49. // Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
  50. func Origins() (origins []string) {
  51. if s := Var("OLLAMA_ORIGINS"); s != "" {
  52. origins = strings.Split(s, ",")
  53. }
  54. for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
  55. origins = append(origins,
  56. fmt.Sprintf("http://%s", origin),
  57. fmt.Sprintf("https://%s", origin),
  58. fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
  59. fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
  60. )
  61. }
  62. origins = append(origins,
  63. "app://*",
  64. "file://*",
  65. "tauri://*",
  66. )
  67. return origins
  68. }
  69. // Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
  70. // Default is $HOME/.ollama/models
  71. func Models() string {
  72. if s := Var("OLLAMA_MODELS"); s != "" {
  73. return s
  74. }
  75. home, err := os.UserHomeDir()
  76. if err != nil {
  77. panic(err)
  78. }
  79. return filepath.Join(home, ".ollama", "models")
  80. }
  81. // KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
  82. // Negative values are treated as infinite. Zero is treated as no keep alive.
  83. // Default is 5 minutes.
  84. func KeepAlive() (keepAlive time.Duration) {
  85. keepAlive = 5 * time.Minute
  86. if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
  87. if d, err := time.ParseDuration(s); err == nil {
  88. keepAlive = d
  89. } else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
  90. keepAlive = time.Duration(n) * time.Second
  91. }
  92. }
  93. if keepAlive < 0 {
  94. return time.Duration(math.MaxInt64)
  95. }
  96. return keepAlive
  97. }
  98. // LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
  99. // Zero or Negative values are treated as infinite.
  100. // Default is 5 minutes.
  101. func LoadTimeout() (loadTimeout time.Duration) {
  102. loadTimeout = 5 * time.Minute
  103. if s := Var("OLLAMA_LOAD_TIMEOUT"); s != "" {
  104. if d, err := time.ParseDuration(s); err == nil {
  105. loadTimeout = d
  106. } else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
  107. loadTimeout = time.Duration(n) * time.Second
  108. }
  109. }
  110. if loadTimeout <= 0 {
  111. return time.Duration(math.MaxInt64)
  112. }
  113. return loadTimeout
  114. }
  115. func Bool(k string) func() bool {
  116. return func() bool {
  117. if s := Var(k); s != "" {
  118. b, err := strconv.ParseBool(s)
  119. if err != nil {
  120. return true
  121. }
  122. return b
  123. }
  124. return false
  125. }
  126. }
  127. var (
  128. // Debug enabled additional debug information.
  129. Debug = Bool("OLLAMA_DEBUG")
  130. // FlashAttention enables the experimental flash attention feature.
  131. FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
  132. // NoHistory disables readline history.
  133. NoHistory = Bool("OLLAMA_NOHISTORY")
  134. // NoPrune disables pruning of model blobs on startup.
  135. NoPrune = Bool("OLLAMA_NOPRUNE")
  136. // SchedSpread allows scheduling models across all GPUs.
  137. SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
  138. // IntelGPU enables experimental Intel GPU detection.
  139. IntelGPU = Bool("OLLAMA_INTEL_GPU")
  140. )
  141. func String(s string) func() string {
  142. return func() string {
  143. return Var(s)
  144. }
  145. }
  146. var (
  147. LLMLibrary = String("OLLAMA_LLM_LIBRARY")
  148. TmpDir = String("OLLAMA_TMPDIR")
  149. CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES")
  150. HipVisibleDevices = String("HIP_VISIBLE_DEVICES")
  151. RocrVisibleDevices = String("ROCR_VISIBLE_DEVICES")
  152. GpuDeviceOrdinal = String("GPU_DEVICE_ORDINAL")
  153. HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
  154. )
  155. func Uint(key string, defaultValue uint) func() uint {
  156. return func() uint {
  157. if s := Var(key); s != "" {
  158. if n, err := strconv.ParseUint(s, 10, 64); err != nil {
  159. slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
  160. } else {
  161. return uint(n)
  162. }
  163. }
  164. return defaultValue
  165. }
  166. }
  167. var (
  168. // NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
  169. NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
  170. // MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
  171. MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
  172. // MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
  173. MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
  174. // MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
  175. MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
  176. )
  177. func Uint64(key string, defaultValue uint64) func() uint64 {
  178. return func() uint64 {
  179. if s := Var(key); s != "" {
  180. if n, err := strconv.ParseUint(s, 10, 64); err != nil {
  181. slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
  182. } else {
  183. return n
  184. }
  185. }
  186. return defaultValue
  187. }
  188. }
  189. // Set aside VRAM per GPU
  190. var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
  191. type EnvVar struct {
  192. Name string
  193. Value any
  194. Description string
  195. }
  196. func AsMap() map[string]EnvVar {
  197. ret := map[string]EnvVar{
  198. "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
  199. "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
  200. "OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
  201. "OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
  202. "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
  203. "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
  204. "OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
  205. "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
  206. "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
  207. "OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
  208. "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
  209. "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
  210. "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
  211. "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
  212. "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
  213. "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
  214. // Informational
  215. "HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
  216. "HTTPS_PROXY": {"HTTPS_PROXY", String("HTTPS_PROXY")(), "HTTPS proxy"},
  217. "NO_PROXY": {"NO_PROXY", String("NO_PROXY")(), "No proxy"},
  218. }
  219. if runtime.GOOS != "windows" {
  220. // Windows environment variables are case-insensitive so there's no need to duplicate them
  221. ret["http_proxy"] = EnvVar{"http_proxy", String("http_proxy")(), "HTTP proxy"}
  222. ret["https_proxy"] = EnvVar{"https_proxy", String("https_proxy")(), "HTTPS proxy"}
  223. ret["no_proxy"] = EnvVar{"no_proxy", String("no_proxy")(), "No proxy"}
  224. }
  225. if runtime.GOOS != "darwin" {
  226. ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
  227. ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
  228. ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
  229. ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
  230. ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
  231. ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
  232. }
  233. return ret
  234. }
  235. func Values() map[string]string {
  236. vals := make(map[string]string)
  237. for k, v := range AsMap() {
  238. vals[k] = fmt.Sprintf("%v", v.Value)
  239. }
  240. return vals
  241. }
  242. // Var returns an environment variable stripped of leading and trailing quotes or spaces
  243. func Var(key string) string {
  244. return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
  245. }
  246. // On windows, we keep the binary at the top directory, but
  247. // other platforms use a "bin" directory, so this returns ".."
  248. func LibRelativeToExe() string {
  249. if runtime.GOOS == "windows" {
  250. return "."
  251. }
  252. return ".."
  253. }