config.go 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. package envconfig
  2. import (
  3. "fmt"
  4. "log/slog"
  5. "net"
  6. "os"
  7. "path/filepath"
  8. "runtime"
  9. "strconv"
  10. "strings"
  11. )
  12. var (
  13. // Set via OLLAMA_ORIGINS in the environment
  14. AllowOrigins []string
  15. // Set via OLLAMA_DEBUG in the environment
  16. Debug bool
  17. // Experimental flash attention
  18. FlashAttention bool
  19. // Set via OLLAMA_KEEP_ALIVE in the environment
  20. KeepAlive string
  21. // Set via OLLAMA_LLM_LIBRARY in the environment
  22. LLMLibrary string
  23. // Set via OLLAMA_MAX_LOADED_MODELS in the environment
  24. MaxRunners int
  25. // Set via OLLAMA_MAX_QUEUE in the environment
  26. MaxQueuedRequests int
  27. // Set via OLLAMA_MAX_VRAM in the environment
  28. MaxVRAM uint64
  29. // Set via OLLAMA_NOHISTORY in the environment
  30. NoHistory bool
  31. // Set via OLLAMA_NOPRUNE in the environment
  32. NoPrune bool
  33. // Set via OLLAMA_NUM_PARALLEL in the environment
  34. NumParallel int
  35. // Set via OLLAMA_RUNNERS_DIR in the environment
  36. RunnersDir string
  37. // Set via OLLAMA_TMPDIR in the environment
  38. TmpDir string
  39. )
  40. type EnvVar struct {
  41. Name string
  42. Value any
  43. Description string
  44. }
  45. func AsMap() map[string]EnvVar {
  46. return map[string]EnvVar{
  47. "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
  48. "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
  49. "OLLAMA_HOST": {"OLLAMA_HOST", "", "IP Address for the ollama server (default 127.0.0.1:11434)"},
  50. "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
  51. "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
  52. "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models (default 1)"},
  53. "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
  54. "OLLAMA_MAX_VRAM": {"OLLAMA_MAX_VRAM", MaxVRAM, "Maximum VRAM"},
  55. "OLLAMA_MODELS": {"OLLAMA_MODELS", "", "The path to the models directory"},
  56. "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"},
  57. "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"},
  58. "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests (default 1)"},
  59. "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
  60. "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"},
  61. "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
  62. }
  63. }
  64. func Values() map[string]string {
  65. vals := make(map[string]string)
  66. for k, v := range AsMap() {
  67. vals[k] = fmt.Sprintf("%v", v.Value)
  68. }
  69. return vals
  70. }
  71. var defaultAllowOrigins = []string{
  72. "localhost",
  73. "127.0.0.1",
  74. "0.0.0.0",
  75. }
  76. // Clean quotes and spaces from the value
  77. func clean(key string) string {
  78. return strings.Trim(os.Getenv(key), "\"' ")
  79. }
  80. func init() {
  81. // default values
  82. NumParallel = 1
  83. MaxRunners = 1
  84. MaxQueuedRequests = 512
  85. LoadConfig()
  86. }
  87. func LoadConfig() {
  88. if debug := clean("OLLAMA_DEBUG"); debug != "" {
  89. d, err := strconv.ParseBool(debug)
  90. if err == nil {
  91. Debug = d
  92. } else {
  93. Debug = true
  94. }
  95. }
  96. if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" {
  97. d, err := strconv.ParseBool(fa)
  98. if err == nil {
  99. FlashAttention = d
  100. }
  101. }
  102. RunnersDir = clean("OLLAMA_RUNNERS_DIR")
  103. if runtime.GOOS == "windows" && RunnersDir == "" {
  104. // On Windows we do not carry the payloads inside the main executable
  105. appExe, err := os.Executable()
  106. if err != nil {
  107. slog.Error("failed to lookup executable path", "error", err)
  108. }
  109. cwd, err := os.Getwd()
  110. if err != nil {
  111. slog.Error("failed to lookup working directory", "error", err)
  112. }
  113. var paths []string
  114. for _, root := range []string{filepath.Dir(appExe), cwd} {
  115. paths = append(paths,
  116. root,
  117. filepath.Join(root, "windows-"+runtime.GOARCH),
  118. filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
  119. )
  120. }
  121. // Try a few variations to improve developer experience when building from source in the local tree
  122. for _, p := range paths {
  123. candidate := filepath.Join(p, "ollama_runners")
  124. _, err := os.Stat(candidate)
  125. if err == nil {
  126. RunnersDir = candidate
  127. break
  128. }
  129. }
  130. if RunnersDir == "" {
  131. slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
  132. }
  133. }
  134. TmpDir = clean("OLLAMA_TMPDIR")
  135. userLimit := clean("OLLAMA_MAX_VRAM")
  136. if userLimit != "" {
  137. avail, err := strconv.ParseUint(userLimit, 10, 64)
  138. if err != nil {
  139. slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
  140. } else {
  141. MaxVRAM = avail
  142. }
  143. }
  144. LLMLibrary = clean("OLLAMA_LLM_LIBRARY")
  145. if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" {
  146. val, err := strconv.Atoi(onp)
  147. if err != nil || val <= 0 {
  148. slog.Error("invalid setting must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
  149. } else {
  150. NumParallel = val
  151. }
  152. }
  153. if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" {
  154. NoHistory = true
  155. }
  156. if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
  157. NoPrune = true
  158. }
  159. if origins := clean("OLLAMA_ORIGINS"); origins != "" {
  160. AllowOrigins = strings.Split(origins, ",")
  161. }
  162. for _, allowOrigin := range defaultAllowOrigins {
  163. AllowOrigins = append(AllowOrigins,
  164. fmt.Sprintf("http://%s", allowOrigin),
  165. fmt.Sprintf("https://%s", allowOrigin),
  166. fmt.Sprintf("http://%s", net.JoinHostPort(allowOrigin, "*")),
  167. fmt.Sprintf("https://%s", net.JoinHostPort(allowOrigin, "*")),
  168. )
  169. }
  170. maxRunners := clean("OLLAMA_MAX_LOADED_MODELS")
  171. if maxRunners != "" {
  172. m, err := strconv.Atoi(maxRunners)
  173. if err != nil {
  174. slog.Error("invalid setting", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
  175. } else {
  176. MaxRunners = m
  177. }
  178. }
  179. if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" {
  180. p, err := strconv.Atoi(onp)
  181. if err != nil || p <= 0 {
  182. slog.Error("invalid setting", "OLLAMA_MAX_QUEUE", onp, "error", err)
  183. } else {
  184. MaxQueuedRequests = p
  185. }
  186. }
  187. KeepAlive = clean("OLLAMA_KEEP_ALIVE")
  188. }