config.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. package envconfig
  2. import (
  3. "fmt"
  4. "log/slog"
  5. "math"
  6. "net"
  7. "net/url"
  8. "os"
  9. "path/filepath"
  10. "runtime"
  11. "strconv"
  12. "strings"
  13. "time"
  14. )
  15. // Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
  16. // Default is scheme "http" and host "127.0.0.1:11434"
  17. func Host() *url.URL {
  18. defaultPort := "11434"
  19. s := strings.TrimSpace(Var("OLLAMA_HOST"))
  20. scheme, hostport, ok := strings.Cut(s, "://")
  21. switch {
  22. case !ok:
  23. scheme, hostport = "http", s
  24. case scheme == "http":
  25. defaultPort = "80"
  26. case scheme == "https":
  27. defaultPort = "443"
  28. }
  29. // trim trailing slashes
  30. hostport = strings.TrimRight(hostport, "/")
  31. host, port, err := net.SplitHostPort(hostport)
  32. if err != nil {
  33. host, port = "127.0.0.1", defaultPort
  34. if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
  35. host = ip.String()
  36. } else if hostport != "" {
  37. host = hostport
  38. }
  39. }
  40. if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
  41. slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
  42. return &url.URL{
  43. Scheme: scheme,
  44. Host: net.JoinHostPort(host, defaultPort),
  45. }
  46. }
  47. return &url.URL{
  48. Scheme: scheme,
  49. Host: net.JoinHostPort(host, port),
  50. }
  51. }
  52. // Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
  53. func Origins() (origins []string) {
  54. if s := Var("OLLAMA_ORIGINS"); s != "" {
  55. origins = strings.Split(s, ",")
  56. }
  57. for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
  58. origins = append(origins,
  59. fmt.Sprintf("http://%s", origin),
  60. fmt.Sprintf("https://%s", origin),
  61. fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
  62. fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
  63. )
  64. }
  65. origins = append(origins,
  66. "app://*",
  67. "file://*",
  68. "tauri://*",
  69. )
  70. return origins
  71. }
  72. // Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
  73. // Default is $HOME/.ollama/models
  74. func Models() string {
  75. if s := Var("OLLAMA_MODELS"); s != "" {
  76. return s
  77. }
  78. home, err := os.UserHomeDir()
  79. if err != nil {
  80. panic(err)
  81. }
  82. return filepath.Join(home, ".ollama", "models")
  83. }
  84. // KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
  85. // Negative values are treated as infinite. Zero is treated as no keep alive.
  86. // Default is 5 minutes.
  87. func KeepAlive() (keepAlive time.Duration) {
  88. keepAlive = 5 * time.Minute
  89. if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
  90. if d, err := time.ParseDuration(s); err == nil {
  91. keepAlive = d
  92. } else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
  93. keepAlive = time.Duration(n) * time.Second
  94. }
  95. }
  96. if keepAlive < 0 {
  97. return time.Duration(math.MaxInt64)
  98. }
  99. return keepAlive
  100. }
  101. func Bool(k string) func() bool {
  102. return func() bool {
  103. if s := Var(k); s != "" {
  104. b, err := strconv.ParseBool(s)
  105. if err != nil {
  106. return true
  107. }
  108. return b
  109. }
  110. return false
  111. }
  112. }
  113. var (
  114. // Debug enabled additional debug information.
  115. Debug = Bool("OLLAMA_DEBUG")
  116. // FlashAttention enables the experimental flash attention feature.
  117. FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
  118. // NoHistory disables readline history.
  119. NoHistory = Bool("OLLAMA_NOHISTORY")
  120. // NoPrune disables pruning of model blobs on startup.
  121. NoPrune = Bool("OLLAMA_NOPRUNE")
  122. // SchedSpread allows scheduling models across all GPUs.
  123. SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
  124. // IntelGPU enables experimental Intel GPU detection.
  125. IntelGPU = Bool("OLLAMA_INTEL_GPU")
  126. )
  127. func String(s string) func() string {
  128. return func() string {
  129. return Var(s)
  130. }
  131. }
  132. var (
  133. LLMLibrary = String("OLLAMA_LLM_LIBRARY")
  134. TmpDir = String("OLLAMA_TMPDIR")
  135. CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES")
  136. HipVisibleDevices = String("HIP_VISIBLE_DEVICES")
  137. RocrVisibleDevices = String("ROCR_VISIBLE_DEVICES")
  138. GpuDeviceOrdinal = String("GPU_DEVICE_ORDINAL")
  139. HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
  140. )
  141. func RunnersDir() (p string) {
  142. if p := Var("OLLAMA_RUNNERS_DIR"); p != "" {
  143. return p
  144. }
  145. if runtime.GOOS != "windows" {
  146. return
  147. }
  148. defer func() {
  149. if p == "" {
  150. slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'")
  151. }
  152. }()
  153. // On Windows we do not carry the payloads inside the main executable
  154. exe, err := os.Executable()
  155. if err != nil {
  156. return
  157. }
  158. cwd, err := os.Getwd()
  159. if err != nil {
  160. return
  161. }
  162. var paths []string
  163. for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), ".."), cwd} {
  164. paths = append(paths,
  165. root,
  166. filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
  167. filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
  168. )
  169. }
  170. // Try a few variations to improve developer experience when building from source in the local tree
  171. for _, path := range paths {
  172. candidate := filepath.Join(path, "lib", "ollama", "runners")
  173. if _, err := os.Stat(candidate); err == nil {
  174. p = candidate
  175. break
  176. }
  177. }
  178. return p
  179. }
  180. func Uint(key string, defaultValue uint) func() uint {
  181. return func() uint {
  182. if s := Var(key); s != "" {
  183. if n, err := strconv.ParseUint(s, 10, 64); err != nil {
  184. slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
  185. } else {
  186. return uint(n)
  187. }
  188. }
  189. return defaultValue
  190. }
  191. }
  192. var (
  193. // NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
  194. NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
  195. // MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
  196. MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
  197. // MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
  198. MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
  199. // MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
  200. MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
  201. )
  202. type EnvVar struct {
  203. Name string
  204. Value any
  205. Description string
  206. }
  207. func AsMap() map[string]EnvVar {
  208. ret := map[string]EnvVar{
  209. "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
  210. "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
  211. "OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
  212. "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
  213. "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
  214. "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
  215. "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
  216. "OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
  217. "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
  218. "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
  219. "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
  220. "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
  221. "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
  222. "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
  223. "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
  224. }
  225. if runtime.GOOS != "darwin" {
  226. ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
  227. ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
  228. ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
  229. ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
  230. ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
  231. ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
  232. }
  233. return ret
  234. }
  235. func Values() map[string]string {
  236. vals := make(map[string]string)
  237. for k, v := range AsMap() {
  238. vals[k] = fmt.Sprintf("%v", v.Value)
  239. }
  240. return vals
  241. }
  242. // Var returns an environment variable stripped of leading and trailing quotes or spaces
  243. func Var(key string) string {
  244. return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
  245. }