10 月之前 · 7359c5ea5e
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -8,6 +8,7 @@ import (
 
				 	"crypto/ed25519"
			
 
				 	"crypto/rand"
			
 
				 	"crypto/sha256"
			
 
				+	_ "embed"
			
 
				 	"encoding/pem"
			
 
				 	"errors"
			
 
				 	"fmt"
			
@@ -47,6 +48,9 @@ import (
 
				 	"github.com/ollama/ollama/version"
			
 
				 )
			
 
				 
			
 
				+//go:embed usage.gotmpl
			
 
				+var usageTemplate string
			
 
				+
			
 
				 func CreateHandler(cmd *cobra.Command, args []string) error {
			
 
				 	filename, _ := cmd.Flags().GetString("file")
			
 
				 	filename, err := filepath.Abs(filename)
			
@@ -1254,21 +1258,6 @@ func versionHandler(cmd *cobra.Command, _ []string) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-func appendEnvDocs(cmd *cobra.Command, envs []envconfig.EnvVar) {
			
 
				-	if len(envs) == 0 {
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	envUsage := `
			
 
				-Environment Variables:
			
 
				-`
			
 
				-	for _, e := range envs {
			
 
				-		envUsage += fmt.Sprintf("      %-24s   %s\n", e.Name, e.Description)
			
 
				-	}
			
 
				-
			
 
				-	cmd.SetUsageTemplate(cmd.UsageTemplate() + envUsage)
			
 
				-}
			
 
				-
			
 
				 func NewCLI() *cobra.Command {
			
 
				 	log.SetFlags(log.LstdFlags | log.Lshortfile)
			
 
				 	cobra.EnableCommandSorting = false
			
@@ -1298,22 +1287,24 @@ func NewCLI() *cobra.Command {
 
				 	rootCmd.Flags().BoolP("version", "v", false, "Show version information")
			
 
				 
			
 
				 	createCmd := &cobra.Command{
			
 
				-		Use:     "create MODEL",
			
 
				-		Short:   "Create a model from a Modelfile",
			
 
				-		Args:    cobra.ExactArgs(1),
			
 
				-		PreRunE: checkServerHeartbeat,
			
 
				-		RunE:    CreateHandler,
			
 
				+		Use:         "create MODEL",
			
 
				+		Short:       "Create a model from a Modelfile",
			
 
				+		Args:        cobra.ExactArgs(1),
			
 
				+		PreRunE:     checkServerHeartbeat,
			
 
				+		RunE:        CreateHandler,
			
 
				+		Annotations: envconfig.Describe("OLLAMA_HOST"),
			
 
				 	}
			
 
				 
			
 
				 	createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile")
			
 
				 	createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_0)")
			
 
				 
			
 
				 	showCmd := &cobra.Command{
			
 
				-		Use:     "show MODEL",
			
 
				-		Short:   "Show information for a model",
			
 
				-		Args:    cobra.ExactArgs(1),
			
 
				-		PreRunE: checkServerHeartbeat,
			
 
				-		RunE:    ShowHandler,
			
 
				+		Use:         "show MODEL",
			
 
				+		Short:       "Show information for a model",
			
 
				+		Args:        cobra.ExactArgs(1),
			
 
				+		PreRunE:     checkServerHeartbeat,
			
 
				+		RunE:        ShowHandler,
			
 
				+		Annotations: envconfig.Describe("OLLAMA_HOST"),
			
 
				 	}
			
 
				 
			
 
				 	showCmd.Flags().Bool("license", false, "Show license of a model")
			
@@ -1323,11 +1314,12 @@ func NewCLI() *cobra.Command {
 
				 	showCmd.Flags().Bool("system", false, "Show system message of a model")
			
 
				 
			
 
				 	runCmd := &cobra.Command{
			
 
				-		Use:     "run MODEL [PROMPT]",
			
 
				-		Short:   "Run a model",
			
 
				-		Args:    cobra.MinimumNArgs(1),
			
 
				-		PreRunE: checkServerHeartbeat,
			
 
				-		RunE:    RunHandler,
			
 
				+		Use:         "run MODEL [PROMPT]",
			
 
				+		Short:       "Run a model",
			
 
				+		Args:        cobra.MinimumNArgs(1),
			
 
				+		PreRunE:     checkServerHeartbeat,
			
 
				+		RunE:        RunHandler,
			
 
				+		Annotations: envconfig.Describe("OLLAMA_HOST", "OLLAMA_NOHISTORY"),
			
 
				 	}
			
 
				 
			
 
				 	runCmd.Flags().String("keepalive", "", "Duration to keep a model loaded (e.g. 5m)")
			
@@ -1350,100 +1342,80 @@ func NewCLI() *cobra.Command {
 
				 		Short:   "Start ollama",
			
 
				 		Args:    cobra.ExactArgs(0),
			
 
				 		RunE:    RunServer,
			
 
				+		Annotations: envconfig.Describe(
			
 
				+			"OLLAMA_DEBUG",
			
 
				+			"OLLAMA_HOST",
			
 
				+			"OLLAMA_KEEP_ALIVE",
			
 
				+			"OLLAMA_MAX_LOADED_MODELS",
			
 
				+			"OLLAMA_MAX_QUEUE",
			
 
				+			"OLLAMA_MODELS",
			
 
				+			"OLLAMA_NUM_PARALLEL",
			
 
				+			"OLLAMA_NOPRUNE",
			
 
				+			"OLLAMA_ORIGINS",
			
 
				+			"OLLAMA_SCHED_SPREAD",
			
 
				+			"OLLAMA_TMPDIR",
			
 
				+			"OLLAMA_FLASH_ATTENTION",
			
 
				+			"OLLAMA_LLM_LIBRARY",
			
 
				+			"OLLAMA_GPU_OVERHEAD",
			
 
				+			"OLLAMA_LOAD_TIMEOUT",
			
 
				+		),
			
 
				 	}
			
 
				 
			
 
				 	pullCmd := &cobra.Command{
			
 
				-		Use:     "pull MODEL",
			
 
				-		Short:   "Pull a model from a registry",
			
 
				-		Args:    cobra.ExactArgs(1),
			
 
				-		PreRunE: checkServerHeartbeat,
			
 
				-		RunE:    PullHandler,
			
 
				+		Use:         "pull MODEL",
			
 
				+		Short:       "Pull a model from a registry",
			
 
				+		Args:        cobra.ExactArgs(1),
			
 
				+		PreRunE:     checkServerHeartbeat,
			
 
				+		RunE:        PullHandler,
			
 
				+		Annotations: envconfig.Describe("OLLAMA_HOST"),
			
 
				 	}
			
 
				 
			
 
				 	pullCmd.Flags().Bool("insecure", false, "Use an insecure registry")
			
 
				 
			
 
				 	pushCmd := &cobra.Command{
			
 
				-		Use:     "push MODEL",
			
 
				-		Short:   "Push a model to a registry",
			
 
				-		Args:    cobra.ExactArgs(1),
			
 
				-		PreRunE: checkServerHeartbeat,
			
 
				-		RunE:    PushHandler,
			
 
				+		Use:         "push MODEL",
			
 
				+		Short:       "Push a model to a registry",
			
 
				+		Args:        cobra.ExactArgs(1),
			
 
				+		PreRunE:     checkServerHeartbeat,
			
 
				+		RunE:        PushHandler,
			
 
				+		Annotations: envconfig.Describe("OLLAMA_HOST"),
			
 
				 	}
			
 
				 
			
 
				 	pushCmd.Flags().Bool("insecure", false, "Use an insecure registry")
			
 
				 
			
 
				 	listCmd := &cobra.Command{
			
 
				-		Use:     "list",
			
 
				-		Aliases: []string{"ls"},
			
 
				-		Short:   "List models",
			
 
				-		PreRunE: checkServerHeartbeat,
			
 
				-		RunE:    ListHandler,
			
 
				+		Use:         "list",
			
 
				+		Aliases:     []string{"ls"},
			
 
				+		Short:       "List models",
			
 
				+		PreRunE:     checkServerHeartbeat,
			
 
				+		RunE:        ListHandler,
			
 
				+		Annotations: envconfig.Describe("OLLAMA_HOST"),
			
 
				 	}
			
 
				 
			
 
				 	psCmd := &cobra.Command{
			
 
				-		Use:     "ps",
			
 
				-		Short:   "List running models",
			
 
				-		PreRunE: checkServerHeartbeat,
			
 
				-		RunE:    ListRunningHandler,
			
 
				+		Use:         "ps",
			
 
				+		Short:       "List running models",
			
 
				+		PreRunE:     checkServerHeartbeat,
			
 
				+		RunE:        ListRunningHandler,
			
 
				+		Annotations: envconfig.Describe("OLLAMA_HOST"),
			
 
				 	}
			
 
				 
			
 
				 	copyCmd := &cobra.Command{
			
 
				-		Use:     "cp SOURCE DESTINATION",
			
 
				-		Short:   "Copy a model",
			
 
				-		Args:    cobra.ExactArgs(2),
			
 
				-		PreRunE: checkServerHeartbeat,
			
 
				-		RunE:    CopyHandler,
			
 
				+		Use:         "cp SOURCE DESTINATION",
			
 
				+		Short:       "Copy a model",
			
 
				+		Args:        cobra.ExactArgs(2),
			
 
				+		PreRunE:     checkServerHeartbeat,
			
 
				+		RunE:        CopyHandler,
			
 
				+		Annotations: envconfig.Describe("OLLAMA_HOST"),
			
 
				 	}
			
 
				 
			
 
				 	deleteCmd := &cobra.Command{
			
 
				-		Use:     "rm MODEL [MODEL...]",
			
 
				-		Short:   "Remove a model",
			
 
				-		Args:    cobra.MinimumNArgs(1),
			
 
				-		PreRunE: checkServerHeartbeat,
			
 
				-		RunE:    DeleteHandler,
			
 
				-	}
			
 
				-
			
 
				-	envVars := envconfig.AsMap()
			
 
				-
			
 
				-	envs := []envconfig.EnvVar{envVars["OLLAMA_HOST"]}
			
 
				-
			
 
				-	for _, cmd := range []*cobra.Command{
			
 
				-		createCmd,
			
 
				-		showCmd,
			
 
				-		runCmd,
			
 
				-		stopCmd,
			
 
				-		pullCmd,
			
 
				-		pushCmd,
			
 
				-		listCmd,
			
 
				-		psCmd,
			
 
				-		copyCmd,
			
 
				-		deleteCmd,
			
 
				-		serveCmd,
			
 
				-	} {
			
 
				-		switch cmd {
			
 
				-		case runCmd:
			
 
				-			appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
			
 
				-		case serveCmd:
			
 
				-			appendEnvDocs(cmd, []envconfig.EnvVar{
			
 
				-				envVars["OLLAMA_DEBUG"],
			
 
				-				envVars["OLLAMA_HOST"],
			
 
				-				envVars["OLLAMA_KEEP_ALIVE"],
			
 
				-				envVars["OLLAMA_MAX_LOADED_MODELS"],
			
 
				-				envVars["OLLAMA_MAX_QUEUE"],
			
 
				-				envVars["OLLAMA_MODELS"],
			
 
				-				envVars["OLLAMA_NUM_PARALLEL"],
			
 
				-				envVars["OLLAMA_NOPRUNE"],
			
 
				-				envVars["OLLAMA_ORIGINS"],
			
 
				-				envVars["OLLAMA_SCHED_SPREAD"],
			
 
				-				envVars["OLLAMA_TMPDIR"],
			
 
				-				envVars["OLLAMA_FLASH_ATTENTION"],
			
 
				-				envVars["OLLAMA_LLM_LIBRARY"],
			
 
				-				envVars["OLLAMA_GPU_OVERHEAD"],
			
 
				-				envVars["OLLAMA_LOAD_TIMEOUT"],
			
 
				-			})
			
 
				-		default:
			
 
				-			appendEnvDocs(cmd, envs)
			
 
				-		}
			
 
				+		Use:         "rm MODEL [MODEL...]",
			
 
				+		Short:       "Remove a model",
			
 
				+		Args:        cobra.MinimumNArgs(1),
			
 
				+		PreRunE:     checkServerHeartbeat,
			
 
				+		RunE:        DeleteHandler,
			
 
				+		Annotations: envconfig.Describe("OLLAMA_HOST"),
			
 
				 	}
			
 
				 
			
 
				 	rootCmd.AddCommand(
			
@@ -1460,5 +1432,7 @@ func NewCLI() *cobra.Command {
 
				 		deleteCmd,
			
 
				 	)
			
 
				 
			
 
				+	rootCmd.SetUsageTemplate(usageTemplate)
			
 
				+
			
 
				 	return rootCmd
			
 
				 }
			
--- a/cmd/usage.gotmpl
+++ b/cmd/usage.gotmpl
@@ -0,0 +1,87 @@
 
				+Usage:
			
 
				+{{- if .Runnable }} {{ .UseLine }}
			
 
				+{{- end }}
			
 
				+{{- if .HasAvailableSubCommands }} {{ .CommandPath }} [command]
			
 
				+{{- end }}
			
 
				+
			
 
				+{{- if gt (len .Aliases) 0}}
			
 
				+
			
 
				+Aliases:
			
 
				+  {{ .NameAndAliases }}
			
 
				+{{- end }}
			
 
				+
			
 
				+{{- if .HasExample }}
			
 
				+
			
 
				+Examples:
			
 
				+{{ .Example }}
			
 
				+{{- end }}
			
 
				+
			
 
				+{{- if .HasAvailableSubCommands }}
			
 
				+{{- if eq (len .Groups) 0}}
			
 
				+
			
 
				+Available Commands:
			
 
				+{{- range .Commands }}
			
 
				+{{- if or .IsAvailableCommand (eq .Name "help") }}
			
 
				+  {{ rpad .Name .NamePadding }} {{ .Short }}
			
 
				+{{- end }}
			
 
				+{{- end }}
			
 
				+
			
 
				+{{- else }}
			
 
				+
			
 
				+{{- range .Groups }}
			
 
				+
			
 
				+{{ .Title }}
			
 
				+
			
 
				+{{- range $.Commands }}
			
 
				+{{- if and (eq .GroupID .ID) (or .IsAvailableCommand (eq .Name "help")) }}
			
 
				+  {{ rpad .Name .NamePadding }} {{ .Short }}
			
 
				+{{- end }}
			
 
				+{{- end }}
			
 
				+{{- end }}
			
 
				+
			
 
				+{{- if not .AllChildCommandsHaveGroup }}
			
 
				+
			
 
				+Additional Commands:
			
 
				+{{- range $.Commands }}
			
 
				+{{- if and (eq .GroupID "") (or .IsAvailableCommand (eq .Name "help")) }}
			
 
				+  {{ rpad .Name .NamePadding }} {{ .Short }}
			
 
				+{{- end }}
			
 
				+{{- end }}
			
 
				+{{- end }}
			
 
				+{{- end }}
			
 
				+{{- end }}
			
 
				+
			
 
				+{{- if .HasAvailableLocalFlags }}
			
 
				+
			
 
				+Flags:
			
 
				+{{ .LocalFlags.FlagUsages | trimTrailingWhitespaces }}
			
 
				+{{- end }}
			
 
				+
			
 
				+{{- if .HasAvailableInheritedFlags }}
			
 
				+
			
 
				+Global Flags:
			
 
				+{{ .InheritedFlags.FlagUsages | trimTrailingWhitespaces }}
			
 
				+{{- end }}
			
 
				+
			
 
				+{{- if .Annotations }}
			
 
				+
			
 
				+Environment Variables:
			
 
				+{{- range $key, $value := .Annotations }}
			
 
				+  {{ rpad $key 24 }} {{ $value | trimTrailingWhitespaces }}
			
 
				+{{- end }}
			
 
				+{{- end }}
			
 
				+
			
 
				+{{- if .HasHelpSubCommands }}
			
 
				+
			
 
				+Additional help topics:
			
 
				+{{- range .Commands }}
			
 
				+{{- if .IsAdditionalHelpTopicCommand }}
			
 
				+  {{ rpad .CommandPath .CommandPathPadding }} {{ .Short }}
			
 
				+{{- end }}
			
 
				+{{- end }}
			
 
				+{{- end }}
			
 
				+
			
 
				+{{- if .HasAvailableSubCommands }}
			
 
				+
			
 
				+Use "{{ .CommandPath }} [command] --help" for more information about a command.
			
 
				+{{- end }}
			
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -9,6 +9,7 @@ import (
 
				 	"os"
			
 
				 	"path/filepath"
			
 
				 	"runtime"
			
 
				+	"slices"
			
 
				 	"strconv"
			
 
				 	"strings"
			
 
				 	"time"
			
@@ -92,46 +93,37 @@ func Models() string {
 
				 	return filepath.Join(home, ".ollama", "models")
			
 
				 }
			
 
				 
			
 
				-// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
			
 
				-// Negative values are treated as infinite. Zero is treated as no keep alive.
			
 
				-// Default is 5 minutes.
			
 
				-func KeepAlive() (keepAlive time.Duration) {
			
 
				-	keepAlive = 5 * time.Minute
			
 
				-	if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
			
 
				-		if d, err := time.ParseDuration(s); err == nil {
			
 
				-			keepAlive = d
			
 
				-		} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
			
 
				-			keepAlive = time.Duration(n) * time.Second
			
 
				+func Duration(k string, defaultValue time.Duration, zeroIsInfinite bool) func() time.Duration {
			
 
				+	return func() time.Duration {
			
 
				+		dur := defaultValue
			
 
				+		if s := Var(k); s != "" {
			
 
				+			if d, err := time.ParseDuration(s); err == nil {
			
 
				+				dur = d
			
 
				+			} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
			
 
				+				dur = time.Duration(n) * time.Second
			
 
				+			}
			
 
				 		}
			
 
				-	}
			
 
				-
			
 
				-	if keepAlive < 0 {
			
 
				-		return time.Duration(math.MaxInt64)
			
 
				-	}
			
 
				-
			
 
				-	return keepAlive
			
 
				-}
			
 
				 
			
 
				-// LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
			
 
				-// Zero or Negative values are treated as infinite.
			
 
				-// Default is 5 minutes.
			
 
				-func LoadTimeout() (loadTimeout time.Duration) {
			
 
				-	loadTimeout = 5 * time.Minute
			
 
				-	if s := Var("OLLAMA_LOAD_TIMEOUT"); s != "" {
			
 
				-		if d, err := time.ParseDuration(s); err == nil {
			
 
				-			loadTimeout = d
			
 
				-		} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
			
 
				-			loadTimeout = time.Duration(n) * time.Second
			
 
				+		if dur < 0 || (dur == 0 && zeroIsInfinite) {
			
 
				+			return time.Duration(math.MaxInt64)
			
 
				 		}
			
 
				-	}
			
 
				 
			
 
				-	if loadTimeout <= 0 {
			
 
				-		return time.Duration(math.MaxInt64)
			
 
				+		return dur
			
 
				 	}
			
 
				-
			
 
				-	return loadTimeout
			
 
				 }
			
 
				 
			
 
				+var (
			
 
				+	// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
			
 
				+	// Negative values are treated as infinite keep alive. Zero is treated as no keep alive.
			
 
				+	// Default is 5 minutes.
			
 
				+	KeepAlive = Duration("OLLAMA_KEEP_ALIVE", 5*time.Minute, false)
			
 
				+
			
 
				+	// LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
			
 
				+	// Negative or zero values are treated as infinite timeout.
			
 
				+	// Default is 5 minutes.
			
 
				+	LoadTimeout = Duration("OLLAMA_LOAD_TIMEOUT", 5*time.Minute, true)
			
 
				+)
			
 
				+
			
 
				 func Bool(k string) func() bool {
			
 
				 	return func() bool {
			
 
				 		if s := Var(k); s != "" {
			
@@ -170,7 +162,7 @@ func String(s string) func() string {
 
				 
			
 
				 var (
			
 
				 	LLMLibrary = String("OLLAMA_LLM_LIBRARY")
			
 
				-	TmpDir     = String("OLLAMA_TMPDIR")
			
 
				+	TempDir    = String("OLLAMA_TMPDIR")
			
 
				 
			
 
				 	CudaVisibleDevices    = String("CUDA_VISIBLE_DEVICES")
			
 
				 	HipVisibleDevices     = String("HIP_VISIBLE_DEVICES")
			
@@ -179,13 +171,14 @@ var (
 
				 	HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
			
 
				 )
			
 
				 
			
 
				-func Uint(key string, defaultValue uint) func() uint {
			
 
				-	return func() uint {
			
 
				+
			
 
				+func Uint[T uint | uint16 | uint32 | uint64](key string, defaultValue T) func() T {
			
 
				+	return func() T {
			
 
				 		if s := Var(key); s != "" {
			
 
				 			if n, err := strconv.ParseUint(s, 10, 64); err != nil {
			
 
				 				slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
			
 
				 			} else {
			
 
				-				return uint(n)
			
 
				+				return T(n)
			
 
				 			}
			
 
				 		}
			
 
				 
			
@@ -195,88 +188,91 @@ func Uint(key string, defaultValue uint) func() uint {
 
				 
			
 
				 var (
			
 
				 	// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
			
 
				-	NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
			
 
				+	NumParallel = Uint("OLLAMA_NUM_PARALLEL", uint(0))
			
 
				 	// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
			
 
				-	MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
			
 
				+	MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", uint(0))
			
 
				 	// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
			
 
				-	MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
			
 
				+	MaxQueue = Uint("OLLAMA_MAX_QUEUE", uint(512))
			
 
				 	// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
			
 
				-	MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
			
 
				+	MaxVRAM = Uint("OLLAMA_MAX_VRAM", uint(0))
			
 
				+	// GPUOverhead reserves a portion of VRAM per GPU. GPUOverhead can be configured via the OLLAMA_GPU_OVERHEAD environment variable.
			
 
				+	GPUOverhead = Uint("OLLAMA_GPU_OVERHEAD", uint64(0))
			
 
				 )
			
 
				 
			
 
				-func Uint64(key string, defaultValue uint64) func() uint64 {
			
 
				-	return func() uint64 {
			
 
				-		if s := Var(key); s != "" {
			
 
				-			if n, err := strconv.ParseUint(s, 10, 64); err != nil {
			
 
				-				slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
			
 
				-			} else {
			
 
				-				return n
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		return defaultValue
			
 
				-	}
			
 
				+type desc struct {
			
 
				+	name         string
			
 
				+	usage        string
			
 
				+	value        any
			
 
				+	defaultValue any
			
 
				 }
			
 
				 
			
 
				-// Set aside VRAM per GPU
			
 
				-var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
			
 
				-
			
 
				-type EnvVar struct {
			
 
				-	Name        string
			
 
				-	Value       any
			
 
				-	Description string
			
 
				+func (e desc) String() string {
			
 
				+	return fmt.Sprintf("%s:%v", e.name, e.value)
			
 
				 }
			
 
				 
			
 
				-func AsMap() map[string]EnvVar {
			
 
				-	ret := map[string]EnvVar{
			
 
				-		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
			
 
				-		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
			
 
				-		"OLLAMA_GPU_OVERHEAD":      {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
			
 
				-		"OLLAMA_HOST":              {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
			
 
				-		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
			
 
				-		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
			
 
				-		"OLLAMA_LOAD_TIMEOUT":      {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
			
 
				-		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
			
 
				-		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
			
 
				-		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
			
 
				-		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
			
 
				-		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
			
 
				-		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
			
 
				-		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
			
 
				-		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
			
 
				-		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
			
 
				-
			
 
				-		// Informational
			
 
				-		"HTTP_PROXY":  {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
			
 
				-		"HTTPS_PROXY": {"HTTPS_PROXY", String("HTTPS_PROXY")(), "HTTPS proxy"},
			
 
				-		"NO_PROXY":    {"NO_PROXY", String("NO_PROXY")(), "No proxy"},
			
 
				+func Vars() []desc {
			
 
				+	s := []desc{
			
 
				+		{"OLLAMA_DEBUG", "Enable debug", Debug(), false},
			
 
				+		{"OLLAMA_FLASH_ATTENTION", "Enabled flash attention", FlashAttention(), false},
			
 
				+		{"OLLAMA_GPU_OVERHEAD", "Reserve a portion of VRAM per GPU", GPUOverhead(), 0},
			
 
				+		{"OLLAMA_HOST", "Listen address and port", Host(), "127.0.0.1:11434"},
			
 
				+		{"OLLAMA_KEEP_ALIVE", "Duration of inactivity before models are unloaded", KeepAlive(), 5 * time.Minute},
			
 
				+		{"OLLAMA_LLM_LIBRARY", "Set LLM library to bypass autodetection", LLMLibrary(), nil},
			
 
				+		{"OLLAMA_LOAD_TIMEOUT", "Duration for stall detection during model loads", LoadTimeout(), 5 * time.Minute},
			
 
				+		{"OLLAMA_MAX_LOADED_MODELS", "Maximum number of loaded models per GPU", MaxRunners(), nil},
			
 
				+		{"OLLAMA_MAX_QUEUE", "Maximum number of queued requests", MaxQueue(), nil},
			
 
				+		{"OLLAMA_MAX_VRAM", "Maximum VRAM to consider for model offloading", MaxVRAM(), nil},
			
 
				+		{"OLLAMA_MODELS", "Path override for models directory", Models(), nil},
			
 
				+		{"OLLAMA_NOHISTORY", "Disable readline history", NoHistory(), false},
			
 
				+		{"OLLAMA_NOPRUNE", "Disable unused blob pruning", NoPrune(), false},
			
 
				+		{"OLLAMA_NUM_PARALLEL", "Maximum number of parallel requests before requests are queued", NumParallel(), nil},
			
 
				+		{"OLLAMA_ORIGINS", "Additional HTTP Origins to allow", Origins(), nil},
			
 
				+		{"OLLAMA_SCHED_SPREAD", "Always schedule model across all GPUs", SchedSpread(), false},
			
 
				+		{"OLLAMA_TMPDIR", "Path override for temporary directory", TempDir(), nil},
			
 
				+
			
 
				+		// informational
			
 
				+		{"HTTPS_PROXY", "Proxy for HTTPS requests", os.Getenv("HTTPS_PROXY"), nil},
			
 
				+		{"HTTP_PROXY", "Proxy for HTTP requests", os.Getenv("HTTP_PROXY"), nil},
			
 
				+		{"NO_PROXY", "No proxy for these hosts", os.Getenv("NO_PROXY"), nil},
			
 
				 	}
			
 
				 
			
 
				 	if runtime.GOOS != "windows" {
			
 
				-		// Windows environment variables are case-insensitive so there's no need to duplicate them
			
 
				-		ret["http_proxy"] = EnvVar{"http_proxy", String("http_proxy")(), "HTTP proxy"}
			
 
				-		ret["https_proxy"] = EnvVar{"https_proxy", String("https_proxy")(), "HTTPS proxy"}
			
 
				-		ret["no_proxy"] = EnvVar{"no_proxy", String("no_proxy")(), "No proxy"}
			
 
				+		s = append(
			
 
				+			s,
			
 
				+			desc{"https_proxy", "Proxy for HTTPS requests", os.Getenv("https_proxy"), nil},
			
 
				+			desc{"http_proxy", "Proxy for HTTP requests", os.Getenv("http_proxy"), nil},
			
 
				+			desc{"no_proxy", "No proxy for these hosts", os.Getenv("no_proxy"), nil},
			
 
				+		)
			
 
				 	}
			
 
				 
			
 
				 	if runtime.GOOS != "darwin" {
			
 
				-		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
			
 
				-		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
			
 
				-		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
			
 
				-		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
			
 
				-		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
			
 
				-		ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
			
 
				+		s = append(
			
 
				+			s,
			
 
				+			desc{"CUDA_VISIBLE_DEVICES", "Set which NVIDIA devices are visible", CudaVisibleDevices(), nil},
			
 
				+			desc{"HIP_VISIBLE_DEVICES", "Set which AMD devices are visible", HipVisibleDevices(), nil},
			
 
				+			desc{"ROCR_VISIBLE_DEVICES", "Set which AMD devices are visible", RocrVisibleDevices(), nil},
			
 
				+			desc{"GPU_DEVICE_ORDINAL", "Set which AMD devices are visible", GpuDeviceOrdinal(), nil},
			
 
				+			desc{"HSA_OVERRIDE_GFX_VERSION", "Override the gfx used for all detected AMD GPUs", HsaOverrideGfxVersion(), nil},
			
 
				+			desc{"OLLAMA_INTEL_GPU", "Enable experimental Intel GPU detection", IntelGPU(), nil},
			
 
				+		)
			
 
				 	}
			
 
				 
			
 
				-	return ret
			
 
				+	return s
			
 
				 }
			
 
				 
			
 
				-func Values() map[string]string {
			
 
				-	vals := make(map[string]string)
			
 
				-	for k, v := range AsMap() {
			
 
				-		vals[k] = fmt.Sprintf("%v", v.Value)
			
 
				+func Describe(s ...string) map[string]string {
			
 
				+	vars := Vars()
			
 
				+	m := make(map[string]string, len(s))
			
 
				+	for _, k := range s {
			
 
				+		if i := slices.IndexFunc(vars, func(e desc) bool { return e.name == k }); i != -1 {
			
 
				+			m[k] = vars[i].usage
			
 
				+			if vars[i].defaultValue != nil {
			
 
				+				m[k] = fmt.Sprintf("%s (default: %v)", vars[i].usage, vars[i].defaultValue)
			
 
				+			}
			
 
				+		}
			
 
				 	}
			
 
				-	return vals
			
 
				+
			
 
				+	return m
			
 
				 }
			
 
				 
			
 
				 // Var returns an environment variable stripped of leading and trailing quotes or spaces
			
--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -175,7 +175,7 @@ func TestUint(t *testing.T) {
 
				 	for k, v := range cases {
			
 
				 		t.Run(k, func(t *testing.T) {
			
 
				 			t.Setenv("OLLAMA_UINT", k)
			
 
				-			if i := Uint("OLLAMA_UINT", 11434)(); i != v {
			
 
				+			if i := Uint("OLLAMA_UINT", uint(11434))(); i != v {
			
 
				 				t.Errorf("%s: expected %d, got %d", k, v, i)
			
 
				 			}
			
 
				 		})
			
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -95,7 +95,7 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
 
				 	// Overflow that didn't fit into the GPU
			
 
				 	var overflow uint64
			
 
				 
			
 
				-	overhead := envconfig.GpuOverhead()
			
 
				+	overhead := envconfig.GPUOverhead()
			
 
				 	availableList := make([]string, len(gpus))
			
 
				 	for i, gpu := range gpus {
			
 
				 		availableList[i] = format.HumanBytes2(gpu.FreeMemory)
			
@@ -322,7 +322,7 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
 
				 }
			
 
				 
			
 
				 func (m MemoryEstimate) log() {
			
 
				-	overhead := envconfig.GpuOverhead()
			
 
				+	overhead := envconfig.GPUOverhead()
			
 
				 	slog.Info(
			
 
				 		"offload to "+m.inferenceLibrary,
			
 
				 		slog.Group(
			
--- a/runners/common.go
+++ b/runners/common.go
@@ -119,7 +119,7 @@ func hasPayloads(payloadFS fs.FS) bool {
 
				 
			
 
				 func extractRunners(payloadFS fs.FS) (string, error) {
			
 
				 	cleanupTmpDirs()
			
 
				-	tmpDir, err := os.MkdirTemp(envconfig.TmpDir(), "ollama")
			
 
				+	tmpDir, err := os.MkdirTemp(envconfig.TempDir(), "ollama")
			
 
				 	if err != nil {
			
 
				 		return "", fmt.Errorf("failed to generate tmp dir: %w", err)
			
 
				 	}
			
@@ -224,7 +224,7 @@ func extractFiles(payloadFS fs.FS, targetDir string, glob string) error {
 
				 
			
 
				 // Best effort to clean up prior tmpdirs
			
 
				 func cleanupTmpDirs() {
			
 
				-	tmpDir := envconfig.TmpDir()
			
 
				+	tmpDir := envconfig.TempDir()
			
 
				 	if tmpDir == "" {
			
 
				 		tmpDir = os.TempDir()
			
 
				 	}
			
--- a/server/routes.go
+++ b/server/routes.go
@@ -1150,7 +1150,7 @@ func Serve(ln net.Listener) error {
 
				 		level = slog.LevelDebug
			
 
				 	}
			
 
				 
			
 
				-	slog.Info("server config", "env", envconfig.Values())
			
 
				+	slog.Info("server config", "env", envconfig.Vars())
			
 
				 	handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
			
 
				 		Level:     level,
			
 
				 		AddSource: true,