1 year ago · 8727a9c140
--- a/gpu/amd_hip_windows.go
+++ b/gpu/amd_hip_windows.go
@@ -3,7 +3,6 @@ package gpu
 
				 import (
			
 
				 	"fmt"
			
 
				 	"log/slog"
			
 
				-	"strconv"
			
 
				 	"syscall"
			
 
				 	"unsafe"
			
 
				 
			
@@ -74,16 +73,22 @@ func (hl *HipLib) Release() {
 
				 	hl.dll = 0
			
 
				 }
			
 
				 
			
 
				-func (hl *HipLib) AMDDriverVersion() (string, error) {
			
 
				+func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
			
 
				 	if hl.dll == 0 {
			
 
				-		return "", fmt.Errorf("dll has been unloaded")
			
 
				+		return 0, 0, fmt.Errorf("dll has been unloaded")
			
 
				 	}
			
 
				 	var version int
			
 
				 	status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
			
 
				 	if status != hipSuccess {
			
 
				-		return "", fmt.Errorf("failed call to hipDriverGetVersion: %d %s", status, err)
			
 
				+		return 0, 0, fmt.Errorf("failed call to hipDriverGetVersion: %d %s", status, err)
			
 
				 	}
			
 
				-	return strconv.Itoa(version), nil
			
 
				+
			
 
				+	slog.Debug("hipDriverGetVersion", "version", version)
			
 
				+	// TODO - this isn't actually right, but the docs claim hipDriverGetVersion isn't accurate anyway...
			
 
				+	driverMajor = version / 1000
			
 
				+	driverMinor = (version - (driverMajor * 1000)) / 10
			
 
				+
			
 
				+	return driverMajor, driverMinor, nil
			
 
				 }
			
 
				 
			
 
				 func (hl *HipLib) HipGetDeviceCount() int {
			
--- a/gpu/amd_linux.go
+++ b/gpu/amd_linux.go
@@ -8,6 +8,7 @@ import (
 
				 	"log/slog"
			
 
				 	"os"
			
 
				 	"path/filepath"
			
 
				+	"regexp"
			
 
				 	"slices"
			
 
				 	"strconv"
			
 
				 	"strings"
			
@@ -41,10 +42,8 @@ func AMDGetGPUInfo() []GpuInfo {
 
				 	}
			
 
				 
			
 
				 	// Opportunistic logging of driver version to aid in troubleshooting
			
 
				-	ver, err := AMDDriverVersion()
			
 
				-	if err == nil {
			
 
				-		slog.Info("AMD Driver: " + ver)
			
 
				-	} else {
			
 
				+	driverMajor, driverMinor, err := AMDDriverVersion()
			
 
				+	if err != nil {
			
 
				 		// TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
			
 
				 		slog.Warn("ollama recommends running the https://www.amd.com/en/support/linux-drivers", "error", err)
			
 
				 	}
			
@@ -91,6 +90,7 @@ func AMDGetGPUInfo() []GpuInfo {
 
				 		scanner := bufio.NewScanner(fp)
			
 
				 		isCPU := false
			
 
				 		var major, minor, patch uint64
			
 
				+		var vendor, device uint64
			
 
				 		for scanner.Scan() {
			
 
				 			line := strings.TrimSpace(scanner.Text())
			
 
				 			// Note: we could also use "cpu_cores_count X" where X is greater than zero to detect CPUs
			
@@ -118,6 +118,26 @@ func AMDGetGPUInfo() []GpuInfo {
 
				 					slog.Debug("malformed int " + line)
			
 
				 					continue
			
 
				 				}
			
 
				+			} else if strings.HasPrefix(line, "vendor_id") {
			
 
				+				ver := strings.Fields(line)
			
 
				+				if len(ver) != 2 {
			
 
				+					slog.Debug("malformed vendor_id", "vendor_id", line)
			
 
				+					continue
			
 
				+				}
			
 
				+				vendor, err = strconv.ParseUint(ver[1], 10, 32)
			
 
				+				if err != nil {
			
 
				+					slog.Debug("malformed vendor_id" + line)
			
 
				+				}
			
 
				+			} else if strings.HasPrefix(line, "device_id") {
			
 
				+				ver := strings.Fields(line)
			
 
				+				if len(ver) != 2 {
			
 
				+					slog.Debug("malformed device_id", "device_id", line)
			
 
				+					continue
			
 
				+				}
			
 
				+				device, err = strconv.ParseUint(ver[1], 10, 32)
			
 
				+				if err != nil {
			
 
				+					slog.Debug("malformed device_id" + line)
			
 
				+				}
			
 
				 			}
			
 
				 
			
 
				 			// TODO - any other properties we want to extract and record?
			
@@ -140,7 +160,7 @@ func AMDGetGPUInfo() []GpuInfo {
 
				 		}
			
 
				 
			
 
				 		if int(major) < RocmComputeMin {
			
 
				-			slog.Warn(fmt.Sprintf("amdgpu too old gfx%d%d%x", major, minor, patch), "gpu", gpuID)
			
 
				+			slog.Warn(fmt.Sprintf("amdgpu too old gfx%d%x%x", major, minor, patch), "gpu", gpuID)
			
 
				 			continue
			
 
				 		}
			
 
				 
			
@@ -210,24 +230,29 @@ func AMDGetGPUInfo() []GpuInfo {
 
				 
			
 
				 		// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
			
 
				 		if totalMemory < IGPUMemLimit {
			
 
				-			slog.Info("amdgpu appears to be an iGPU, skipping", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
			
 
				+			slog.Info("unsupported Radeon iGPU detected skipping", "id", gpuID, "total", format.HumanBytes2(totalMemory))
			
 
				 			continue
			
 
				 		}
			
 
				+		var name string
			
 
				+		// TODO - PCI ID lookup
			
 
				+		if vendor > 0 && device > 0 {
			
 
				+			name = fmt.Sprintf("%04x:%04x", vendor, device)
			
 
				+		}
			
 
				 
			
 
				-		slog.Info("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
			
 
				-		slog.Info("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
			
 
				+		slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
			
 
				+		slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
			
 
				 		gpuInfo := GpuInfo{
			
 
				 			Library: "rocm",
			
 
				 			memInfo: memInfo{
			
 
				 				TotalMemory: totalMemory,
			
 
				 				FreeMemory:  (totalMemory - usedMemory),
			
 
				 			},
			
 
				-			ID: fmt.Sprintf("%d", gpuID),
			
 
				-			// Name: not exposed in sysfs directly, would require pci device id lookup
			
 
				-			Major:         int(major),
			
 
				-			Minor:         int(minor),
			
 
				-			Patch:         int(patch),
			
 
				+			ID:            fmt.Sprintf("%d", gpuID),
			
 
				+			Name:          name,
			
 
				+			Compute:       fmt.Sprintf("gfx%d%x%x", major, minor, patch),
			
 
				 			MinimumMemory: rocmMinimumMemory,
			
 
				+			DriverMajor:   driverMajor,
			
 
				+			DriverMinor:   driverMinor,
			
 
				 		}
			
 
				 
			
 
				 		// If the user wants to filter to a subset of devices, filter out if we aren't a match
			
@@ -266,7 +291,7 @@ func AMDGetGPUInfo() []GpuInfo {
 
				 				}
			
 
				 				slog.Debug("rocm supported GPUs", "types", supported)
			
 
				 			}
			
 
				-			gfx := fmt.Sprintf("gfx%d%d%x", gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch)
			
 
				+			gfx := gpuInfo.Compute
			
 
				 			if !slices.Contains[[]string, string](supported, gfx) {
			
 
				 				slog.Warn("amdgpu is not supported", "gpu", gpuInfo.ID, "gpu_type", gfx, "library", libDir, "supported_types", supported)
			
 
				 				// TODO - consider discrete markdown just for ROCM troubleshooting?
			
@@ -276,7 +301,7 @@ func AMDGetGPUInfo() []GpuInfo {
 
				 				slog.Info("amdgpu is supported", "gpu", gpuInfo.ID, "gpu_type", gfx)
			
 
				 			}
			
 
				 		} else {
			
 
				-			slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride)
			
 
				+			slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
			
 
				 		}
			
 
				 
			
 
				 		// The GPU has passed all the verification steps and is supported
			
@@ -322,19 +347,34 @@ func AMDValidateLibDir() (string, error) {
 
				 	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
			
 
				 }
			
 
				 
			
 
				-func AMDDriverVersion() (string, error) {
			
 
				-	_, err := os.Stat(DriverVersionFile)
			
 
				+func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
			
 
				+	_, err = os.Stat(DriverVersionFile)
			
 
				 	if err != nil {
			
 
				-		return "", fmt.Errorf("amdgpu version file missing: %s %w", DriverVersionFile, err)
			
 
				+		return 0, 0, fmt.Errorf("amdgpu version file missing: %s %w", DriverVersionFile, err)
			
 
				 	}
			
 
				 	fp, err := os.Open(DriverVersionFile)
			
 
				 	if err != nil {
			
 
				-		return "", err
			
 
				+		return 0, 0, err
			
 
				 	}
			
 
				 	defer fp.Close()
			
 
				 	verString, err := io.ReadAll(fp)
			
 
				 	if err != nil {
			
 
				-		return "", err
			
 
				+		return 0, 0, err
			
 
				+	}
			
 
				+
			
 
				+	pattern := `\A(\d+)\.(\d+).*`
			
 
				+	regex := regexp.MustCompile(pattern)
			
 
				+	match := regex.FindStringSubmatch(string(verString))
			
 
				+	if len(match) < 2 {
			
 
				+		return 0, 0, fmt.Errorf("malformed version string %s", string(verString))
			
 
				+	}
			
 
				+	driverMajor, err = strconv.Atoi(match[1])
			
 
				+	if err != nil {
			
 
				+		return 0, 0, err
			
 
				+	}
			
 
				+	driverMinor, err = strconv.Atoi(match[2])
			
 
				+	if err != nil {
			
 
				+		return 0, 0, err
			
 
				 	}
			
 
				-	return strings.TrimSpace(string(verString)), nil
			
 
				+	return driverMajor, driverMinor, nil
			
 
				 }
			
--- a/gpu/amd_windows.go
+++ b/gpu/amd_windows.go
@@ -7,7 +7,6 @@ import (
 
				 	"os"
			
 
				 	"path/filepath"
			
 
				 	"slices"
			
 
				-	"strconv"
			
 
				 	"strings"
			
 
				 
			
 
				 	"github.com/ollama/ollama/format"
			
@@ -34,13 +33,12 @@ func AMDGetGPUInfo() []GpuInfo {
 
				 	}
			
 
				 	defer hl.Release()
			
 
				 
			
 
				-	ver, err := hl.AMDDriverVersion()
			
 
				-	if err == nil {
			
 
				-		slog.Info("AMD Driver: " + ver)
			
 
				-	} else {
			
 
				-		// For now this is benign, but we may eventually need to fail compatibility checks
			
 
				-		slog.Debug("error looking up amd driver version", "error", err)
			
 
				-	}
			
 
				+	// TODO - this reports incorrect version information, so omitting for now
			
 
				+	// driverMajor, driverMinor, err := hl.AMDDriverVersion()
			
 
				+	// if err != nil {
			
 
				+	// 	// For now this is benign, but we may eventually need to fail compatibility checks
			
 
				+	// 	slog.Debug("error looking up amd driver version", "error", err)
			
 
				+	// }
			
 
				 
			
 
				 	// Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified
			
 
				 	count := hl.HipGetDeviceCount()
			
@@ -62,10 +60,10 @@ func AMDGetGPUInfo() []GpuInfo {
 
				 			return nil
			
 
				 		}
			
 
				 	} else {
			
 
				-		slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride)
			
 
				+		slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
			
 
				 	}
			
 
				 
			
 
				-	slog.Info("detected hip devices", "count", count)
			
 
				+	slog.Debug("detected hip devices", "count", count)
			
 
				 	// TODO how to determine the underlying device ID when visible devices is causing this to subset?
			
 
				 	for i := 0; i < count; i++ {
			
 
				 		err = hl.HipSetDevice(i)
			
@@ -85,18 +83,11 @@ func AMDGetGPUInfo() []GpuInfo {
 
				 		// Can luid be used on windows for setting visible devices (and is it actually set?)
			
 
				 		n = bytes.IndexByte(props.GcnArchName[:], 0)
			
 
				 		gfx := string(props.GcnArchName[:n])
			
 
				-		slog.Info("hip device", "id", i, "name", name, "gfx", gfx)
			
 
				-		var major, minor, patch string
			
 
				-		switch len(gfx) {
			
 
				-		case 6:
			
 
				-			major, minor, patch = gfx[3:4], gfx[4:5], gfx[5:]
			
 
				-		case 7:
			
 
				-			major, minor, patch = gfx[3:5], gfx[5:6], gfx[6:]
			
 
				-		}
			
 
				+		slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
			
 
				 		//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY!  Always 0
			
 
				 		// TODO  Why isn't props.iGPU accurate!?
			
 
				 		if strings.EqualFold(name, iGPUName) {
			
 
				-			slog.Info("iGPU detected skipping", "id", i)
			
 
				+			slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
			
 
				 			continue
			
 
				 		}
			
 
				 		if gfxOverride == "" {
			
@@ -106,7 +97,7 @@ func AMDGetGPUInfo() []GpuInfo {
 
				 				slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
			
 
				 				continue
			
 
				 			} else {
			
 
				-				slog.Info("amdgpu is supported", "gpu", i, "gpu_type", gfx)
			
 
				+				slog.Debug("amdgpu is supported", "gpu", i, "gpu_type", gfx)
			
 
				 			}
			
 
				 		}
			
 
				 
			
@@ -124,8 +115,8 @@ func AMDGetGPUInfo() []GpuInfo {
 
				 
			
 
				 		// TODO revisit this once ROCm v6 is available on windows.
			
 
				 		// v5.7 only reports VRAM used by this process, so it's completely wrong and unusable
			
 
				-		slog.Info("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
			
 
				-		slog.Info("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
			
 
				+		slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
			
 
				+		slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
			
 
				 		gpuInfo := GpuInfo{
			
 
				 			Library: "rocm",
			
 
				 			memInfo: memInfo{
			
@@ -135,31 +126,12 @@ func AMDGetGPUInfo() []GpuInfo {
 
				 			ID:             fmt.Sprintf("%d", i), // TODO this is probably wrong if we specify visible devices
			
 
				 			DependencyPath: libDir,
			
 
				 			MinimumMemory:  rocmMinimumMemory,
			
 
				-		}
			
 
				-		if major != "" {
			
 
				-			gpuInfo.Major, err = strconv.Atoi(major)
			
 
				-			if err != nil {
			
 
				-				slog.Info("failed to parse version", "version", gfx, "error", err)
			
 
				-			}
			
 
				-		}
			
 
				-		if minor != "" {
			
 
				-			gpuInfo.Minor, err = strconv.Atoi(minor)
			
 
				-			if err != nil {
			
 
				-				slog.Info("failed to parse version", "version", gfx, "error", err)
			
 
				-			}
			
 
				-		}
			
 
				-		if patch != "" {
			
 
				-			// Patch rev is hex; e.g. gfx90a
			
 
				-			p, err := strconv.ParseInt(patch, 16, 0)
			
 
				-			if err != nil {
			
 
				-				slog.Info("failed to parse version", "version", gfx, "error", err)
			
 
				-			} else {
			
 
				-				gpuInfo.Patch = int(p)
			
 
				-			}
			
 
				-		}
			
 
				-		if gpuInfo.Major < RocmComputeMin {
			
 
				-			slog.Warn(fmt.Sprintf("amdgpu [%s] too old gfx%d%d%x", gpuInfo.ID, gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch))
			
 
				-			continue
			
 
				+			Name:           name,
			
 
				+			Compute:        gfx,
			
 
				+
			
 
				+			// TODO - this information isn't accurate on windows, so don't report it until we find the right way to retrieve
			
 
				+			// DriverMajor:    driverMajor,
			
 
				+			// DriverMinor:    driverMinor,
			
 
				 		}
			
 
				 
			
 
				 		resp = append(resp, gpuInfo)
			
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -119,12 +119,12 @@ func initGPUHandles() *handles {
 
				 		return gpuHandles
			
 
				 	}
			
 
				 
			
 
				-	slog.Info("Detecting GPUs")
			
 
				+	slog.Debug("Detecting GPUs")
			
 
				 	nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
			
 
				 	if len(nvcudaLibPaths) > 0 {
			
 
				 		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
			
 
				 		if nvcuda != nil {
			
 
				-			slog.Info("detected GPUs", "count", deviceCount, "library", libPath)
			
 
				+			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
			
 
				 			gpuHandles.nvcuda = nvcuda
			
 
				 			gpuHandles.deviceCount = deviceCount
			
 
				 			return gpuHandles
			
@@ -135,7 +135,7 @@ func initGPUHandles() *handles {
 
				 	if len(cudartLibPaths) > 0 {
			
 
				 		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
			
 
				 		if cudart != nil {
			
 
				-			slog.Info("detected GPUs", "library", libPath, "count", deviceCount)
			
 
				+			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
			
 
				 			gpuHandles.cudart = cudart
			
 
				 			gpuHandles.deviceCount = deviceCount
			
 
				 			return gpuHandles
			
@@ -184,10 +184,14 @@ func GetGPUInfo() GpuInfoList {
 
				 		gpuInfo := GpuInfo{
			
 
				 			Library: "cuda",
			
 
				 		}
			
 
				+		var driverMajor int
			
 
				+		var driverMinor int
			
 
				 		if gpuHandles.cudart != nil {
			
 
				 			C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
			
 
				 		} else {
			
 
				 			C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
			
 
				+			driverMajor = int(gpuHandles.nvcuda.driver_major)
			
 
				+			driverMinor = int(gpuHandles.nvcuda.driver_minor)
			
 
				 		}
			
 
				 		if memInfo.err != nil {
			
 
				 			slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
			
@@ -201,10 +205,12 @@ func GetGPUInfo() GpuInfoList {
 
				 		gpuInfo.TotalMemory = uint64(memInfo.total)
			
 
				 		gpuInfo.FreeMemory = uint64(memInfo.free)
			
 
				 		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
			
 
				-		gpuInfo.Major = int(memInfo.major)
			
 
				-		gpuInfo.Minor = int(memInfo.minor)
			
 
				+		gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
			
 
				 		gpuInfo.MinimumMemory = cudaMinimumMemory
			
 
				 		gpuInfo.DependencyPath = depPath
			
 
				+		gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
			
 
				+		gpuInfo.DriverMajor = int(driverMajor)
			
 
				+		gpuInfo.DriverMinor = int(driverMinor)
			
 
				 
			
 
				 		// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
			
 
				 		resp = append(resp, gpuInfo)
			
--- a/gpu/gpu_info.h
+++ b/gpu/gpu_info.h
@@ -39,16 +39,19 @@ extern "C" {
 
				 #endif
			
 
				 
			
 
				 #define GPU_ID_LEN 64
			
 
				+#define GPU_NAME_LEN 96
			
 
				 
			
 
				 typedef struct mem_info {
			
 
				   char *err;  // If non-nill, caller responsible for freeing
			
 
				   char gpu_id[GPU_ID_LEN];
			
 
				+  char gpu_name[GPU_NAME_LEN];
			
 
				   uint64_t total;
			
 
				   uint64_t free;
			
 
				 
			
 
				   // Compute Capability
			
 
				   int major; 
			
 
				   int minor;
			
 
				+  int patch;
			
 
				 } mem_info_t;
			
 
				 
			
 
				 void cpu_check_ram(mem_info_t *resp);
			
--- a/gpu/gpu_info_cpu.c
+++ b/gpu/gpu_info_cpu.c
@@ -10,8 +10,6 @@ void cpu_check_ram(mem_info_t *resp) {
 
				   if (GlobalMemoryStatusEx(&info) != 0) {
			
 
				     resp->total = info.ullTotalPhys;
			
 
				     resp->free = info.ullAvailPhys;
			
 
				-    resp->major = 0;
			
 
				-    resp->minor = 0;
			
 
				     snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0");
			
 
				   } else {
			
 
				     resp->err = LOAD_ERR();
			
@@ -31,8 +29,6 @@ void cpu_check_ram(mem_info_t *resp) {
 
				   } else {
			
 
				     resp->total = info.totalram * info.mem_unit;
			
 
				     resp->free = info.freeram * info.mem_unit;
			
 
				-    resp->major = 0;
			
 
				-    resp->minor = 0;
			
 
				     snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0");
			
 
				   }
			
 
				   return;
			
--- a/gpu/gpu_info_nvcuda.c
+++ b/gpu/gpu_info_nvcuda.c
@@ -22,6 +22,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
 
				       {"cuDeviceGet", (void *)&resp->ch.cuDeviceGet},
			
 
				       {"cuDeviceGetAttribute", (void *)&resp->ch.cuDeviceGetAttribute},
			
 
				       {"cuDeviceGetUuid", (void *)&resp->ch.cuDeviceGetUuid},
			
 
				+      {"cuDeviceGetName", (void *)&resp->ch.cuDeviceGetName},
			
 
				       {"cuCtxCreate_v3", (void *)&resp->ch.cuCtxCreate_v3},
			
 
				       {"cuMemGetInfo_v2", (void *)&resp->ch.cuMemGetInfo_v2},
			
 
				       {"cuCtxDestroy", (void *)&resp->ch.cuCtxDestroy},
			
@@ -70,18 +71,17 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
 
				   }
			
 
				 
			
 
				   int version = 0;
			
 
				-  nvcudaDriverVersion_t driverVersion;
			
 
				-  driverVersion.major = 0;
			
 
				-  driverVersion.minor = 0;
			
 
				+  resp->ch.driver_major = 0;
			
 
				+  resp->ch.driver_minor = 0;
			
 
				 
			
 
				   // Report driver version if we're in verbose mode, ignore errors
			
 
				   ret = (*resp->ch.cuDriverGetVersion)(&version);
			
 
				   if (ret != CUDA_SUCCESS) {
			
 
				     LOG(resp->ch.verbose, "cuDriverGetVersion failed: %d\n", ret);
			
 
				   } else {
			
 
				-    driverVersion.major = version / 1000;
			
 
				-    driverVersion.minor = (version - (driverVersion.major * 1000)) / 10;
			
 
				-    LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", driverVersion.major, driverVersion.minor);
			
 
				+    resp->ch.driver_major = version / 1000;
			
 
				+    resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10;
			
 
				+    LOG(resp->ch.verbose, "CUDA driver version: %d.%d\n", resp->ch.driver_major, resp->ch.driver_minor);
			
 
				   }
			
 
				 
			
 
				   ret = (*resp->ch.cuDeviceGetCount)(&resp->num_devices);
			
@@ -117,8 +117,6 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
 
				     return;
			
 
				   }
			
 
				 
			
 
				-  resp->major = 0;
			
 
				-  resp->minor = 0;
			
 
				   int major = 0;
			
 
				   int minor = 0;
			
 
				   ret = (*h.cuDeviceGetAttribute)(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device);
			
@@ -161,6 +159,12 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
 
				       );
			
 
				   }
			
 
				 
			
 
				+  ret = (*h.cuDeviceGetName)(&resp->gpu_name[0], GPU_NAME_LEN, device);
			
 
				+  if (ret != CUDA_SUCCESS) {
			
 
				+    LOG(h.verbose, "[%d] device name lookup failure: %d\n", i, ret);
			
 
				+    resp->gpu_name[0] = '\0';
			
 
				+  }
			
 
				+
			
 
				   // To get memory we have to set (and release) a context
			
 
				   ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device);
			
 
				   if (ret != CUDA_SUCCESS) {
			
--- a/gpu/gpu_info_nvcuda.h
+++ b/gpu/gpu_info_nvcuda.h
@@ -44,12 +44,15 @@ typedef void* CUcontext;
 
				 typedef struct nvcuda_handle {
			
 
				   void *handle;
			
 
				   uint16_t verbose;
			
 
				+  int driver_major;
			
 
				+  int driver_minor;
			
 
				   CUresult (*cuInit)(unsigned int Flags);
			
 
				   CUresult (*cuDriverGetVersion)(int *driverVersion);
			
 
				   CUresult (*cuDeviceGetCount)(int *);
			
 
				   CUresult (*cuDeviceGet)(CUdevice* device, int ordinal);
			
 
				   CUresult (*cuDeviceGetAttribute)(int* pi, CUdevice_attribute attrib, CUdevice dev);
			
 
				   CUresult (*cuDeviceGetUuid)(CUuuid* uuid, CUdevice dev); // signature compatible with cuDeviceGetUuid_v2
			
 
				+  CUresult (*cuDeviceGetName)(char *name, int len, CUdevice dev);
			
 
				 
			
 
				   // Context specific aspects
			
 
				   CUresult (*cuCtxCreate_v3)(CUcontext* pctx, void *params, int len, unsigned int flags, CUdevice dev);
			
--- a/gpu/types.go
+++ b/gpu/types.go
@@ -1,5 +1,12 @@
 
				 package gpu
			
 
				 
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"log/slog"
			
 
				+
			
 
				+	"github.com/ollama/ollama/format"
			
 
				+)
			
 
				+
			
 
				 type memInfo struct {
			
 
				 	TotalMemory uint64 `json:"total_memory,omitempty"`
			
 
				 	FreeMemory  uint64 `json:"free_memory,omitempty"`
			
@@ -20,11 +27,13 @@ type GpuInfo struct {
 
				 	DependencyPath string `json:"lib_path,omitempty"`
			
 
				 
			
 
				 	// GPU information
			
 
				-	ID    string `json:"gpu_id"`          // string to use for selection of this specific GPU
			
 
				-	Name  string `json:"name"`            // user friendly name if available
			
 
				-	Major int    `json:"major,omitempty"` // Major compatibility version (CC or gfx)
			
 
				-	Minor int    `json:"minor,omitempty"` // Minor compatibility version (CC or gfx)
			
 
				-	Patch int    `json:"patch,omitempty"` // Patch compatibility only matters on AMD
			
 
				+	ID      string `json:"gpu_id"`  // string to use for selection of this specific GPU
			
 
				+	Name    string `json:"name"`    // user friendly name if available
			
 
				+	Compute string `json:"compute"` // Compute Capability or gfx
			
 
				+
			
 
				+	// Driver Information - TODO no need to put this on each GPU
			
 
				+	DriverMajor int `json:"driver_major,omitempty"`
			
 
				+	DriverMinor int `json:"driver_minor,omitempty"`
			
 
				 
			
 
				 	// TODO other performance capability info to help in scheduling decisions
			
 
				 }
			
@@ -56,6 +65,21 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
 
				 	return resp
			
 
				 }
			
 
				 
			
 
				+// Report the GPU information into the log an Info level
			
 
				+func (l GpuInfoList) LogDetails() {
			
 
				+	for _, g := range l {
			
 
				+		slog.Info("inference compute",
			
 
				+			"id", g.ID,
			
 
				+			"library", g.Library,
			
 
				+			"compute", g.Compute,
			
 
				+			"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
			
 
				+			"name", g.Name,
			
 
				+			"total", format.HumanBytes2(g.TotalMemory),
			
 
				+			"available", format.HumanBytes2(g.FreeMemory),
			
 
				+		)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 // Sort by Free Space
			
 
				 type ByFreeMemory []GpuInfo
			
 
				 
			
--- a/server/routes.go
+++ b/server/routes.go
@@ -1065,7 +1065,8 @@ func Serve(ln net.Listener) error {
 
				 
			
 
				 	// At startup we retrieve GPU information so we can get log messages before loading a model
			
 
				 	// This will log warnings to the log in case we have problems with detected GPUs
			
 
				-	_ = gpu.GetGPUInfo()
			
 
				+	gpus := gpu.GetGPUInfo()
			
 
				+	gpus.LogDetails()
			
 
				 
			
 
				 	return srvr.Serve(ln)
			
 
				 }