Browse Source

config: allow setting context length through env var (#8938)

* envconfig: allow setting context length through env var
Parth Sareen 2 tháng trước cách đây
mục cha
commit
314573bfe8
4 tập tin đã thay đổi với 23 bổ sung1 xóa
  1. 3 1
      api/types.go
  2. 3 0
      envconfig/config.go
  3. 16 0
      envconfig/config_test.go
  4. 1 0
      llm/memory_test.go

+ 3 - 1
api/types.go

@@ -10,6 +10,8 @@ import (
 	"strconv"
 	"strings"
 	"time"
+
+	"github.com/ollama/ollama/envconfig"
 )
 
 // StatusError is an error with an HTTP status code and message.
@@ -609,7 +611,7 @@ func DefaultOptions() Options {
 
 		Runner: Runner{
 			// options set when the model is loaded
-			NumCtx:    2048,
+			NumCtx:    int(envconfig.ContextLength()),
 			NumBatch:  512,
 			NumGPU:    -1, // -1 here indicates that NumGPU should be set dynamically
 			NumThread: 0,  // let the runtime decide

+ 3 - 0
envconfig/config.go

@@ -167,6 +167,8 @@ var (
 	MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
 	// Enable the new Ollama engine
 	NewEngine = Bool("OLLAMA_NEW_ENGINE")
+	// ContextLength sets the default context length
+	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 2048)
 )
 
 func String(s string) func() string {
@@ -252,6 +254,7 @@ func AsMap() map[string]EnvVar {
 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
 		"OLLAMA_MULTIUSER_CACHE":   {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
+		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 2048)"},
 		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
 
 		// Informational

+ 16 - 0
envconfig/config_test.go

@@ -272,3 +272,19 @@ func TestVar(t *testing.T) {
 		})
 	}
 }
+
+func TestContextLength(t *testing.T) {
+	cases := map[string]uint{
+		"":     2048,
+		"4096": 4096,
+	}
+
+	for k, v := range cases {
+		t.Run(k, func(t *testing.T) {
+			t.Setenv("OLLAMA_CONTEXT_LENGTH", k)
+			if i := ContextLength(); i != v {
+				t.Errorf("%s: expected %d, got %d", k, v, i)
+			}
+		})
+	}
+}

+ 1 - 0
llm/memory_test.go

@@ -17,6 +17,7 @@ import (
 func TestEstimateGPULayers(t *testing.T) {
 	t.Setenv("OLLAMA_DEBUG", "1")
 	t.Setenv("OLLAMA_KV_CACHE_TYPE", "") // Ensure default f16
+	t.Setenv("OLLAMA_CONTEXT_LENGTH", "2048")
 
 	modelName := "dummy"
 	f, err := os.CreateTemp(t.TempDir(), modelName)