config.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. package envconfig
  2. import (
  3. "fmt"
  4. "log/slog"
  5. "math"
  6. "net"
  7. "net/url"
  8. "os"
  9. "path/filepath"
  10. "runtime"
  11. "slices"
  12. "strconv"
  13. "strings"
  14. "time"
  15. )
  16. // Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
  17. // Default is scheme "http" and host "127.0.0.1:11434"
  18. func Host() *url.URL {
  19. defaultPort := "11434"
  20. s := strings.TrimSpace(Var("OLLAMA_HOST"))
  21. scheme, hostport, ok := strings.Cut(s, "://")
  22. switch {
  23. case !ok:
  24. scheme, hostport = "http", s
  25. case scheme == "http":
  26. defaultPort = "80"
  27. case scheme == "https":
  28. defaultPort = "443"
  29. }
  30. hostport, path, _ := strings.Cut(hostport, "/")
  31. host, port, err := net.SplitHostPort(hostport)
  32. if err != nil {
  33. host, port = "127.0.0.1", defaultPort
  34. if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
  35. host = ip.String()
  36. } else if hostport != "" {
  37. host = hostport
  38. }
  39. }
  40. if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
  41. slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
  42. port = defaultPort
  43. }
  44. return &url.URL{
  45. Scheme: scheme,
  46. Host: net.JoinHostPort(host, port),
  47. Path: path,
  48. }
  49. }
  50. // Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
  51. func Origins() (origins []string) {
  52. if s := Var("OLLAMA_ORIGINS"); s != "" {
  53. origins = strings.Split(s, ",")
  54. }
  55. for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
  56. origins = append(origins,
  57. fmt.Sprintf("http://%s", origin),
  58. fmt.Sprintf("https://%s", origin),
  59. fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
  60. fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
  61. )
  62. }
  63. origins = append(origins,
  64. "app://*",
  65. "file://*",
  66. "tauri://*",
  67. )
  68. return origins
  69. }
  70. // Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
  71. // Default is $HOME/.ollama/models
  72. func Models() string {
  73. if s := Var("OLLAMA_MODELS"); s != "" {
  74. return s
  75. }
  76. home, err := os.UserHomeDir()
  77. if err != nil {
  78. panic(err)
  79. }
  80. return filepath.Join(home, ".ollama", "models")
  81. }
  82. func Duration(k string, defaultValue time.Duration, zeroIsInfinite bool) func() time.Duration {
  83. return func() time.Duration {
  84. dur := defaultValue
  85. if s := Var(k); s != "" {
  86. if d, err := time.ParseDuration(s); err == nil {
  87. dur = d
  88. } else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
  89. dur = time.Duration(n) * time.Second
  90. }
  91. }
  92. if dur < 0 || (dur == 0 && zeroIsInfinite) {
  93. return time.Duration(math.MaxInt64)
  94. }
  95. return dur
  96. }
  97. }
  98. var (
  99. // KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
  100. // Negative values are treated as infinite keep alive. Zero is treated as no keep alive.
  101. // Default is 5 minutes.
  102. KeepAlive = Duration("OLLAMA_KEEP_ALIVE", 5*time.Minute, false)
  103. // LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
  104. // Negative or zero values are treated as infinite timeout.
  105. // Default is 5 minutes.
  106. LoadTimeout = Duration("OLLAMA_LOAD_TIMEOUT", 5*time.Minute, true)
  107. )
  108. func Bool(k string) func() bool {
  109. return func() bool {
  110. if s := Var(k); s != "" {
  111. b, err := strconv.ParseBool(s)
  112. if err != nil {
  113. return true
  114. }
  115. return b
  116. }
  117. return false
  118. }
  119. }
  120. var (
  121. // Debug enabled additional debug information.
  122. Debug = Bool("OLLAMA_DEBUG")
  123. // FlashAttention enables the experimental flash attention feature.
  124. FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
  125. // NoHistory disables readline history.
  126. NoHistory = Bool("OLLAMA_NOHISTORY")
  127. // NoPrune disables pruning of model blobs on startup.
  128. NoPrune = Bool("OLLAMA_NOPRUNE")
  129. // SchedSpread allows scheduling models across all GPUs.
  130. SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
  131. // IntelGPU enables experimental Intel GPU detection.
  132. IntelGPU = Bool("OLLAMA_INTEL_GPU")
  133. )
  134. func String(s string) func() string {
  135. return func() string {
  136. return Var(s)
  137. }
  138. }
  139. var (
  140. LLMLibrary = String("OLLAMA_LLM_LIBRARY")
  141. TempDir = String("OLLAMA_TMPDIR")
  142. CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES")
  143. HipVisibleDevices = String("HIP_VISIBLE_DEVICES")
  144. RocrVisibleDevices = String("ROCR_VISIBLE_DEVICES")
  145. GpuDeviceOrdinal = String("GPU_DEVICE_ORDINAL")
  146. HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
  147. )
  148. func Uint[T uint | uint16 | uint32 | uint64](key string, defaultValue T) func() T {
  149. return func() T {
  150. if s := Var(key); s != "" {
  151. if n, err := strconv.ParseUint(s, 10, 64); err != nil {
  152. slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
  153. } else {
  154. return T(n)
  155. }
  156. }
  157. return defaultValue
  158. }
  159. }
  160. var (
  161. // NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
  162. NumParallel = Uint("OLLAMA_NUM_PARALLEL", uint(0))
  163. // MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
  164. MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", uint(0))
  165. // MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
  166. MaxQueue = Uint("OLLAMA_MAX_QUEUE", uint(512))
  167. // MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
  168. MaxVRAM = Uint("OLLAMA_MAX_VRAM", uint(0))
  169. // GPUOverhead reserves a portion of VRAM per GPU. GPUOverhead can be configured via the OLLAMA_GPU_OVERHEAD environment variable.
  170. GPUOverhead = Uint("OLLAMA_GPU_OVERHEAD", uint64(0))
  171. )
  172. type desc struct {
  173. name string
  174. usage string
  175. value any
  176. defaultValue any
  177. }
  178. func (e desc) String() string {
  179. return fmt.Sprintf("%s:%v", e.name, e.value)
  180. }
  181. func Vars() []desc {
  182. s := []desc{
  183. {"OLLAMA_DEBUG", "Enable debug", Debug(), false},
  184. {"OLLAMA_FLASH_ATTENTION", "Enabled flash attention", FlashAttention(), false},
  185. {"OLLAMA_GPU_OVERHEAD", "Reserve a portion of VRAM per GPU", GPUOverhead(), 0},
  186. {"OLLAMA_HOST", "Listen address and port", Host(), "127.0.0.1:11434"},
  187. {"OLLAMA_KEEP_ALIVE", "Duration of inactivity before models are unloaded", KeepAlive(), 5 * time.Minute},
  188. {"OLLAMA_LLM_LIBRARY", "Set LLM library to bypass autodetection", LLMLibrary(), nil},
  189. {"OLLAMA_LOAD_TIMEOUT", "Duration for stall detection during model loads", LoadTimeout(), 5 * time.Minute},
  190. {"OLLAMA_MAX_LOADED_MODELS", "Maximum number of loaded models per GPU", MaxRunners(), nil},
  191. {"OLLAMA_MAX_QUEUE", "Maximum number of queued requests", MaxQueue(), nil},
  192. {"OLLAMA_MAX_VRAM", "Maximum VRAM to consider for model offloading", MaxVRAM(), nil},
  193. {"OLLAMA_MODELS", "Path override for models directory", Models(), nil},
  194. {"OLLAMA_NOHISTORY", "Disable readline history", NoHistory(), false},
  195. {"OLLAMA_NOPRUNE", "Disable unused blob pruning", NoPrune(), false},
  196. {"OLLAMA_NUM_PARALLEL", "Maximum number of parallel requests before requests are queued", NumParallel(), nil},
  197. {"OLLAMA_ORIGINS", "Additional HTTP Origins to allow", Origins(), nil},
  198. {"OLLAMA_SCHED_SPREAD", "Always schedule model across all GPUs", SchedSpread(), false},
  199. {"OLLAMA_TMPDIR", "Path override for temporary directory", TempDir(), nil},
  200. // informational
  201. {"HTTPS_PROXY", "Proxy for HTTPS requests", os.Getenv("HTTPS_PROXY"), nil},
  202. {"HTTP_PROXY", "Proxy for HTTP requests", os.Getenv("HTTP_PROXY"), nil},
  203. {"NO_PROXY", "No proxy for these hosts", os.Getenv("NO_PROXY"), nil},
  204. }
  205. if runtime.GOOS != "windows" {
  206. s = append(
  207. s,
  208. desc{"https_proxy", "Proxy for HTTPS requests", os.Getenv("https_proxy"), nil},
  209. desc{"http_proxy", "Proxy for HTTP requests", os.Getenv("http_proxy"), nil},
  210. desc{"no_proxy", "No proxy for these hosts", os.Getenv("no_proxy"), nil},
  211. )
  212. }
  213. if runtime.GOOS != "darwin" {
  214. s = append(
  215. s,
  216. desc{"CUDA_VISIBLE_DEVICES", "Set which NVIDIA devices are visible", CudaVisibleDevices(), nil},
  217. desc{"HIP_VISIBLE_DEVICES", "Set which AMD devices are visible", HipVisibleDevices(), nil},
  218. desc{"ROCR_VISIBLE_DEVICES", "Set which AMD devices are visible", RocrVisibleDevices(), nil},
  219. desc{"GPU_DEVICE_ORDINAL", "Set which AMD devices are visible", GpuDeviceOrdinal(), nil},
  220. desc{"HSA_OVERRIDE_GFX_VERSION", "Override the gfx used for all detected AMD GPUs", HsaOverrideGfxVersion(), nil},
  221. desc{"OLLAMA_INTEL_GPU", "Enable experimental Intel GPU detection", IntelGPU(), nil},
  222. )
  223. }
  224. return s
  225. }
  226. func Describe(s ...string) map[string]string {
  227. vars := Vars()
  228. m := make(map[string]string, len(s))
  229. for _, k := range s {
  230. if i := slices.IndexFunc(vars, func(e desc) bool { return e.name == k }); i != -1 {
  231. m[k] = vars[i].usage
  232. if vars[i].defaultValue != nil {
  233. m[k] = fmt.Sprintf("%s (default: %v)", vars[i].usage, vars[i].defaultValue)
  234. }
  235. }
  236. }
  237. return m
  238. }
  239. // Var returns an environment variable stripped of leading and trailing quotes or spaces
  240. func Var(key string) string {
  241. return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
  242. }
  243. // On windows, we keep the binary at the top directory, but
  244. // other platforms use a "bin" directory, so this returns ".."
  245. func LibRelativeToExe() string {
  246. if runtime.GOOS == "windows" {
  247. return "."
  248. }
  249. return ".."
  250. }