10 months ago · 1954ec5917
--- a/api/client_test.go
+++ b/api/client_test.go
@@ -2,8 +2,6 @@ package api
 
				 
			
 
				 import (
			
 
				 	"testing"
			
 
				-
			
 
				-	"github.com/ollama/ollama/envconfig"
			
 
				 )
			
 
				 
			
 
				 func TestClientFromEnvironment(t *testing.T) {
			
@@ -33,7 +31,6 @@ func TestClientFromEnvironment(t *testing.T) {
 
				 	for k, v := range testCases {
			
 
				 		t.Run(k, func(t *testing.T) {
			
 
				 			t.Setenv("OLLAMA_HOST", v.value)
			
 
				-			envconfig.LoadConfig()
			
 
				 
			
 
				 			client, err := ClientFromEnvironment()
			
 
				 			if err != v.err {
			
--- a/integration/concurrency_test.go
+++ b/integration/concurrency_test.go
@@ -5,14 +5,16 @@ package integration
 
				 import (
			
 
				 	"context"
			
 
				 	"log/slog"
			
 
				-	"os"
			
 
				 	"strconv"
			
 
				 	"sync"
			
 
				 	"testing"
			
 
				 	"time"
			
 
				 
			
 
				-	"github.com/ollama/ollama/api"
			
 
				 	"github.com/stretchr/testify/require"
			
 
				+
			
 
				+	"github.com/ollama/ollama/api"
			
 
				+	"github.com/ollama/ollama/envconfig"
			
 
				+	"github.com/ollama/ollama/format"
			
 
				 )
			
 
				 
			
 
				 func TestMultiModelConcurrency(t *testing.T) {
			
@@ -106,13 +108,16 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
 
				 
			
 
				 // Stress the system if we know how much VRAM it has, and attempt to load more models than will fit
			
 
				 func TestMultiModelStress(t *testing.T) {
			
 
				-	vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM
			
 
				-	if vram == "" {
			
 
				+	s := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM
			
 
				+	if s == "" {
			
 
				 		t.Skip("OLLAMA_MAX_VRAM not specified, can't pick the right models for the stress test")
			
 
				 	}
			
 
				-	max, err := strconv.ParseUint(vram, 10, 64)
			
 
				-	require.NoError(t, err)
			
 
				-	const MB = uint64(1024 * 1024)
			
 
				+
			
 
				+	maxVram, err := strconv.ParseUint(s, 10, 64)
			
 
				+	if err != nil {
			
 
				+		t.Fatal(err)
			
 
				+	}
			
 
				+
			
 
				 	type model struct {
			
 
				 		name string
			
 
				 		size uint64 // Approximate amount of VRAM they typically use when fully loaded in VRAM
			
@@ -121,83 +126,82 @@ func TestMultiModelStress(t *testing.T) {
 
				 	smallModels := []model{
			
 
				 		{
			
 
				 			name: "orca-mini",
			
 
				-			size: 2992 * MB,
			
 
				+			size: 2992 * format.MebiByte,
			
 
				 		},
			
 
				 		{
			
 
				 			name: "phi",
			
 
				-			size: 2616 * MB,
			
 
				+			size: 2616 * format.MebiByte,
			
 
				 		},
			
 
				 		{
			
 
				 			name: "gemma:2b",
			
 
				-			size: 2364 * MB,
			
 
				+			size: 2364 * format.MebiByte,
			
 
				 		},
			
 
				 		{
			
 
				 			name: "stable-code:3b",
			
 
				-			size: 2608 * MB,
			
 
				+			size: 2608 * format.MebiByte,
			
 
				 		},
			
 
				 		{
			
 
				 			name: "starcoder2:3b",
			
 
				-			size: 2166 * MB,
			
 
				+			size: 2166 * format.MebiByte,
			
 
				 		},
			
 
				 	}
			
 
				 	mediumModels := []model{
			
 
				 		{
			
 
				 			name: "llama2",
			
 
				-			size: 5118 * MB,
			
 
				+			size: 5118 * format.MebiByte,
			
 
				 		},
			
 
				 		{
			
 
				 			name: "mistral",
			
 
				-			size: 4620 * MB,
			
 
				+			size: 4620 * format.MebiByte,
			
 
				 		},
			
 
				 		{
			
 
				 			name: "orca-mini:7b",
			
 
				-			size: 5118 * MB,
			
 
				+			size: 5118 * format.MebiByte,
			
 
				 		},
			
 
				 		{
			
 
				 			name: "dolphin-mistral",
			
 
				-			size: 4620 * MB,
			
 
				+			size: 4620 * format.MebiByte,
			
 
				 		},
			
 
				 		{
			
 
				 			name: "gemma:7b",
			
 
				-			size: 5000 * MB,
			
 
				+			size: 5000 * format.MebiByte,
			
 
				+		},
			
 
				+		{
			
 
				+			name: "codellama:7b",
			
 
				+			size: 5118 * format.MebiByte,
			
 
				 		},
			
 
				-		// TODO - uncomment this once #3565 is merged and this is rebased on it
			
 
				-		// {
			
 
				-		// 	name: "codellama:7b",
			
 
				-		// 	size: 5118 * MB,
			
 
				-		// },
			
 
				 	}
			
 
				 
			
 
				 	// These seem to be too slow to be useful...
			
 
				 	// largeModels := []model{
			
 
				 	// 	{
			
 
				 	// 		name: "llama2:13b",
			
 
				-	// 		size: 7400 * MB,
			
 
				+	// 		size: 7400 * format.MebiByte,
			
 
				 	// 	},
			
 
				 	// 	{
			
 
				 	// 		name: "codellama:13b",
			
 
				-	// 		size: 7400 * MB,
			
 
				+	// 		size: 7400 * format.MebiByte,
			
 
				 	// 	},
			
 
				 	// 	{
			
 
				 	// 		name: "orca-mini:13b",
			
 
				-	// 		size: 7400 * MB,
			
 
				+	// 		size: 7400 * format.MebiByte,
			
 
				 	// 	},
			
 
				 	// 	{
			
 
				 	// 		name: "gemma:7b",
			
 
				-	// 		size: 5000 * MB,
			
 
				+	// 		size: 5000 * format.MebiByte,
			
 
				 	// 	},
			
 
				 	// 	{
			
 
				 	// 		name: "starcoder2:15b",
			
 
				-	// 		size: 9100 * MB,
			
 
				+	// 		size: 9100 * format.MebiByte,
			
 
				 	// 	},
			
 
				 	// }
			
 
				 
			
 
				 	var chosenModels []model
			
 
				 	switch {
			
 
				-	case max < 10000*MB:
			
 
				+	case maxVram < 10000*format.MebiByte:
			
 
				 		slog.Info("selecting small models")
			
 
				 		chosenModels = smallModels
			
 
				-	// case max < 30000*MB:
			
 
				+	// case maxVram < 30000*format.MebiByte:
			
 
				 	default:
			
 
				 		slog.Info("selecting medium models")
			
 
				 		chosenModels = mediumModels
			
@@ -226,15 +230,15 @@ func TestMultiModelStress(t *testing.T) {
 
				 	}
			
 
				 
			
 
				 	var wg sync.WaitGroup
			
 
				-	consumed := uint64(256 * MB) // Assume some baseline usage
			
 
				+	consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
			
 
				 	for i := 0; i < len(req); i++ {
			
 
				 		// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
			
 
				-		if i > 1 && consumed > max {
			
 
				-			slog.Info("achieved target vram exhaustion", "count", i, "vramMB", max/1024/1024, "modelsMB", consumed/1024/1024)
			
 
				+		if i > 1 && consumed > vram {
			
 
				+			slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))
			
 
				 			break
			
 
				 		}
			
 
				 		consumed += chosenModels[i].size
			
 
				-		slog.Info("target vram", "count", i, "vramMB", max/1024/1024, "modelsMB", consumed/1024/1024)
			
 
				+		slog.Info("target vram", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))
			
 
				 
			
 
				 		wg.Add(1)
			
 
				 		go func(i int) {
			
--- a/server/manifest_test.go
+++ b/server/manifest_test.go
@@ -7,7 +7,6 @@ import (
 
				 	"slices"
			
 
				 	"testing"
			
 
				 
			
 
				-	"github.com/ollama/ollama/envconfig"
			
 
				 	"github.com/ollama/ollama/types/model"
			
 
				 )
			
 
				 
			
@@ -108,7 +107,6 @@ func TestManifests(t *testing.T) {
 
				 		t.Run(n, func(t *testing.T) {
			
 
				 			d := t.TempDir()
			
 
				 			t.Setenv("OLLAMA_MODELS", d)
			
 
				-			envconfig.LoadConfig()
			
 
				 
			
 
				 			for _, p := range wants.ps {
			
 
				 				createManifest(t, d, p)
			
--- a/server/modelpath_test.go
+++ b/server/modelpath_test.go
@@ -7,8 +7,6 @@ import (
 
				 
			
 
				 	"github.com/stretchr/testify/assert"
			
 
				 	"github.com/stretchr/testify/require"
			
 
				-
			
 
				-	"github.com/ollama/ollama/envconfig"
			
 
				 )
			
 
				 
			
 
				 func TestGetBlobsPath(t *testing.T) {
			
@@ -63,7 +61,6 @@ func TestGetBlobsPath(t *testing.T) {
 
				 	for _, tc := range tests {
			
 
				 		t.Run(tc.name, func(t *testing.T) {
			
 
				 			t.Setenv("OLLAMA_MODELS", dir)
			
 
				-			envconfig.LoadConfig()
			
 
				 
			
 
				 			got, err := GetBlobsPath(tc.digest)
			
 
				 
			
--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@@ -15,7 +15,6 @@ import (
 
				 
			
 
				 	"github.com/gin-gonic/gin"
			
 
				 	"github.com/ollama/ollama/api"
			
 
				-	"github.com/ollama/ollama/envconfig"
			
 
				 	"github.com/ollama/ollama/llm"
			
 
				 )
			
 
				 
			
@@ -89,7 +88,6 @@ func TestCreateFromBin(t *testing.T) {
 
				 
			
 
				 	p := t.TempDir()
			
 
				 	t.Setenv("OLLAMA_MODELS", p)
			
 
				-	envconfig.LoadConfig()
			
 
				 
			
 
				 	var s Server
			
 
				 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
			
@@ -117,7 +115,6 @@ func TestCreateFromModel(t *testing.T) {
 
				 
			
 
				 	p := t.TempDir()
			
 
				 	t.Setenv("OLLAMA_MODELS", p)
			
 
				-	envconfig.LoadConfig()
			
 
				 	var s Server
			
 
				 
			
 
				 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
			
@@ -160,7 +157,6 @@ func TestCreateRemovesLayers(t *testing.T) {
 
				 
			
 
				 	p := t.TempDir()
			
 
				 	t.Setenv("OLLAMA_MODELS", p)
			
 
				-	envconfig.LoadConfig()
			
 
				 	var s Server
			
 
				 
			
 
				 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
			
@@ -209,7 +205,6 @@ func TestCreateUnsetsSystem(t *testing.T) {
 
				 
			
 
				 	p := t.TempDir()
			
 
				 	t.Setenv("OLLAMA_MODELS", p)
			
 
				-	envconfig.LoadConfig()
			
 
				 	var s Server
			
 
				 
			
 
				 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
			
@@ -267,7 +262,6 @@ func TestCreateMergeParameters(t *testing.T) {
 
				 
			
 
				 	p := t.TempDir()
			
 
				 	t.Setenv("OLLAMA_MODELS", p)
			
 
				-	envconfig.LoadConfig()
			
 
				 	var s Server
			
 
				 
			
 
				 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
			
@@ -372,7 +366,6 @@ func TestCreateReplacesMessages(t *testing.T) {
 
				 
			
 
				 	p := t.TempDir()
			
 
				 	t.Setenv("OLLAMA_MODELS", p)
			
 
				-	envconfig.LoadConfig()
			
 
				 	var s Server
			
 
				 
			
 
				 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
			
@@ -450,7 +443,6 @@ func TestCreateTemplateSystem(t *testing.T) {
 
				 
			
 
				 	p := t.TempDir()
			
 
				 	t.Setenv("OLLAMA_MODELS", p)
			
 
				-	envconfig.LoadConfig()
			
 
				 	var s Server
			
 
				 
			
 
				 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
			
@@ -534,7 +526,6 @@ func TestCreateLicenses(t *testing.T) {
 
				 
			
 
				 	p := t.TempDir()
			
 
				 	t.Setenv("OLLAMA_MODELS", p)
			
 
				-	envconfig.LoadConfig()
			
 
				 	var s Server
			
 
				 
			
 
				 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
			
@@ -582,7 +573,6 @@ func TestCreateDetectTemplate(t *testing.T) {
 
				 
			
 
				 	p := t.TempDir()
			
 
				 	t.Setenv("OLLAMA_MODELS", p)
			
 
				-	envconfig.LoadConfig()
			
 
				 	var s Server
			
 
				 
			
 
				 	t.Run("matched", func(t *testing.T) {
			
--- a/server/routes_delete_test.go
+++ b/server/routes_delete_test.go
@@ -10,7 +10,6 @@ import (
 
				 
			
 
				 	"github.com/gin-gonic/gin"
			
 
				 	"github.com/ollama/ollama/api"
			
 
				-	"github.com/ollama/ollama/envconfig"
			
 
				 	"github.com/ollama/ollama/types/model"
			
 
				 )
			
 
				 
			
@@ -19,7 +18,6 @@ func TestDelete(t *testing.T) {
 
				 
			
 
				 	p := t.TempDir()
			
 
				 	t.Setenv("OLLAMA_MODELS", p)
			
 
				-	envconfig.LoadConfig()
			
 
				 
			
 
				 	var s Server
			
 
				 
			
--- a/server/routes_list_test.go
+++ b/server/routes_list_test.go
@@ -9,14 +9,12 @@ import (
 
				 
			
 
				 	"github.com/gin-gonic/gin"
			
 
				 	"github.com/ollama/ollama/api"
			
 
				-	"github.com/ollama/ollama/envconfig"
			
 
				 )
			
 
				 
			
 
				 func TestList(t *testing.T) {
			
 
				 	gin.SetMode(gin.TestMode)
			
 
				 
			
 
				 	t.Setenv("OLLAMA_MODELS", t.TempDir())
			
 
				-	envconfig.LoadConfig()
			
 
				 
			
 
				 	expectNames := []string{
			
 
				 		"mistral:7b-instruct-q4_0",
			
--- a/server/routes_test.go
+++ b/server/routes_test.go
@@ -19,7 +19,6 @@ import (
 
				 	"github.com/stretchr/testify/require"
			
 
				 
			
 
				 	"github.com/ollama/ollama/api"
			
 
				-	"github.com/ollama/ollama/envconfig"
			
 
				 	"github.com/ollama/ollama/llm"
			
 
				 	"github.com/ollama/ollama/openai"
			
 
				 	"github.com/ollama/ollama/parser"
			
@@ -347,7 +346,6 @@ func Test_Routes(t *testing.T) {
 
				 	}
			
 
				 
			
 
				 	t.Setenv("OLLAMA_MODELS", t.TempDir())
			
 
				-	envconfig.LoadConfig()
			
 
				 
			
 
				 	s := &Server{}
			
 
				 	router := s.GenerateRoutes()
			
@@ -378,7 +376,6 @@ func Test_Routes(t *testing.T) {
 
				 
			
 
				 func TestCase(t *testing.T) {
			
 
				 	t.Setenv("OLLAMA_MODELS", t.TempDir())
			
 
				-	envconfig.LoadConfig()
			
 
				 
			
 
				 	cases := []string{
			
 
				 		"mistral",
			
@@ -458,7 +455,6 @@ func TestCase(t *testing.T) {
 
				 
			
 
				 func TestShow(t *testing.T) {
			
 
				 	t.Setenv("OLLAMA_MODELS", t.TempDir())
			
 
				-	envconfig.LoadConfig()
			
 
				 
			
 
				 	var s Server