2 tháng trước cách đây · 314573bfe8
--- a/api/types.go
+++ b/api/types.go
@@ -10,6 +10,8 @@ import (
 
				 	"strconv"
			
 
				 	"strings"
			
 
				 	"time"
			
 
				+
			
 
				+	"github.com/ollama/ollama/envconfig"
			
 
				 )
			
 
				 
			
 
				 // StatusError is an error with an HTTP status code and message.
			
@@ -609,7 +611,7 @@ func DefaultOptions() Options {
 
				 
			
 
				 		Runner: Runner{
			
 
				 			// options set when the model is loaded
			
 
				-			NumCtx:    2048,
			
 
				+			NumCtx:    int(envconfig.ContextLength()),
			
 
				 			NumBatch:  512,
			
 
				 			NumGPU:    -1, // -1 here indicates that NumGPU should be set dynamically
			
 
				 			NumThread: 0,  // let the runtime decide
			
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -167,6 +167,8 @@ var (
 
				 	MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
			
 
				 	// Enable the new Ollama engine
			
 
				 	NewEngine = Bool("OLLAMA_NEW_ENGINE")
			
 
				+	// ContextLength sets the default context length
			
 
				+	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 2048)
			
 
				 )
			
 
				 
			
 
				 func String(s string) func() string {
			
@@ -252,6 +254,7 @@ func AsMap() map[string]EnvVar {
 
				 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
			
 
				 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
			
 
				 		"OLLAMA_MULTIUSER_CACHE":   {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
			
 
				+		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 2048)"},
			
 
				 		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
			
 
				 
			
 
				 		// Informational
			
--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -272,3 +272,19 @@ func TestVar(t *testing.T) {
 
				 		})
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+func TestContextLength(t *testing.T) {
			
 
				+	cases := map[string]uint{
			
 
				+		"":     2048,
			
 
				+		"4096": 4096,
			
 
				+	}
			
 
				+
			
 
				+	for k, v := range cases {
			
 
				+		t.Run(k, func(t *testing.T) {
			
 
				+			t.Setenv("OLLAMA_CONTEXT_LENGTH", k)
			
 
				+			if i := ContextLength(); i != v {
			
 
				+				t.Errorf("%s: expected %d, got %d", k, v, i)
			
 
				+			}
			
 
				+		})
			
 
				+	}
			
 
				+}
			
--- a/llm/memory_test.go
+++ b/llm/memory_test.go
@@ -17,6 +17,7 @@ import (
 
				 func TestEstimateGPULayers(t *testing.T) {
			
 
				 	t.Setenv("OLLAMA_DEBUG", "1")
			
 
				 	t.Setenv("OLLAMA_KV_CACHE_TYPE", "") // Ensure default f16
			
 
				+	t.Setenv("OLLAMA_CONTEXT_LENGTH", "2048")
			
 
				 
			
 
				 	modelName := "dummy"
			
 
				 	f, err := os.CreateTemp(t.TempDir(), modelName)