Browse Source

Allow setting max vram for workarounds

Until we get all the memory calculations correct, this can provide
and escape valve for users to workaround out of memory crashes.
Daniel Hiltgen 1 năm trước cách đây
mục cha
commit
be330174dd
2 tập tin đã thay đổi với 24 bổ sung0 xóa
  1. 9 0
      gpu/gpu.go
  2. 15 0
      gpu/gpu_darwin.go

+ 9 - 0
gpu/gpu.go

@@ -242,6 +242,15 @@ func getCPUMem() (memInfo, error) {
 }
 
 func CheckVRAM() (int64, error) {
+	userLimit := os.Getenv("OLLAMA_MAX_VRAM")
+	if userLimit != "" {
+		avail, err := strconv.ParseInt(userLimit, 10, 64)
+		if err != nil {
+			return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
+		}
+		slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
+		return avail, nil
+	}
 	gpuInfo := GetGPUInfo()
 	if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
 		// leave 10% or 1024MiB of VRAM free per GPU to handle unaccounted for overhead

+ 15 - 0
gpu/gpu_darwin.go

@@ -1,6 +1,7 @@
 //go:build darwin
 
 package gpu
+
 /*
 #cgo CFLAGS: -x objective-c
 #cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal
@@ -8,11 +9,25 @@ package gpu
 */
 import "C"
 import (
+	"fmt"
+	"log/slog"
+	"os"
 	"runtime"
+	"strconv"
 )
 
 // CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
 func CheckVRAM() (int64, error) {
+	userLimit := os.Getenv("OLLAMA_MAX_VRAM")
+	if userLimit != "" {
+		avail, err := strconv.ParseInt(userLimit, 10, 64)
+		if err != nil {
+			return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
+		}
+		slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
+		return avail, nil
+	}
+
 	if runtime.GOARCH == "amd64" {
 		// gpu not supported, this may not be metal
 		return 0, nil