Quellcode durchsuchen

Merge pull request #2964 from dhiltgen/mem_limit_var

Allow setting max vram for workarounds
Daniel Hiltgen vor 1 Jahr
Ursprung
Commit
82ddc3e441
2 geänderte Dateien mit 24 neuen und 0 gelöschten Zeilen
  1. 9 0
      gpu/gpu.go
  2. 15 0
      gpu/gpu_darwin.go

+ 9 - 0
gpu/gpu.go

@@ -242,6 +242,15 @@ func getCPUMem() (memInfo, error) {
 }
 
 func CheckVRAM() (int64, error) {
+	userLimit := os.Getenv("OLLAMA_MAX_VRAM")
+	if userLimit != "" {
+		avail, err := strconv.ParseInt(userLimit, 10, 64)
+		if err != nil {
+			return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
+		}
+		slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
+		return avail, nil
+	}
 	gpuInfo := GetGPUInfo()
 	if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
 		// leave 10% or 1024MiB of VRAM free per GPU to handle unaccounted for overhead

+ 15 - 0
gpu/gpu_darwin.go

@@ -1,6 +1,7 @@
 //go:build darwin
 
 package gpu
+
 /*
 #cgo CFLAGS: -x objective-c
 #cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal
@@ -8,11 +9,25 @@ package gpu
 */
 import "C"
 import (
+	"fmt"
+	"log/slog"
+	"os"
 	"runtime"
+	"strconv"
 )
 
 // CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
 func CheckVRAM() (int64, error) {
+	userLimit := os.Getenv("OLLAMA_MAX_VRAM")
+	if userLimit != "" {
+		avail, err := strconv.ParseInt(userLimit, 10, 64)
+		if err != nil {
+			return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
+		}
+		slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
+		return avail, nil
+	}
+
 	if runtime.GOARCH == "amd64" {
 		// gpu not supported, this may not be metal
 		return 0, nil