1 year ago · 526d4eb204
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -35,7 +35,6 @@ const (
 
				 )
			
 
				 
			
 
				 var gpuMutex sync.Mutex
			
 
				-var gpuHandles *handles = nil
			
 
				 
			
 
				 // With our current CUDA compile flags, older than 5.0 will not work properly
			
 
				 var CudaComputeMin = [2]C.int{5, 0}
			
@@ -85,11 +84,11 @@ var CudartWindowsGlobs = []string{
 
				 var CudaTegra string = os.Getenv("JETSON_JETPACK")
			
 
				 
			
 
				 // Note: gpuMutex must already be held
			
 
				-func initGPUHandles() {
			
 
				+func initGPUHandles() *handles {
			
 
				 
			
 
				 	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
			
 
				 
			
 
				-	gpuHandles = &handles{nil, nil}
			
 
				+	gpuHandles := &handles{nil, nil}
			
 
				 	var nvmlMgmtName string
			
 
				 	var nvmlMgmtPatterns []string
			
 
				 	var cudartMgmtName string
			
@@ -116,7 +115,7 @@ func initGPUHandles() {
 
				 		}
			
 
				 		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
			
 
				 	default:
			
 
				-		return
			
 
				+		return gpuHandles
			
 
				 	}
			
 
				 
			
 
				 	slog.Info("Detecting GPU type")
			
@@ -126,7 +125,7 @@ func initGPUHandles() {
 
				 		if cudart != nil {
			
 
				 			slog.Info("Nvidia GPU detected via cudart")
			
 
				 			gpuHandles.cudart = cudart
			
 
				-			return
			
 
				+			return gpuHandles
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -137,10 +136,10 @@ func initGPUHandles() {
 
				 		if nvml != nil {
			
 
				 			slog.Info("Nvidia GPU detected via nvidia-ml")
			
 
				 			gpuHandles.nvml = nvml
			
 
				-			return
			
 
				+			return gpuHandles
			
 
				 		}
			
 
				 	}
			
 
				-
			
 
				+	return gpuHandles
			
 
				 }
			
 
				 
			
 
				 func GetGPUInfo() GpuInfo {
			
@@ -148,9 +147,16 @@ func GetGPUInfo() GpuInfo {
 
				 	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
			
 
				 	gpuMutex.Lock()
			
 
				 	defer gpuMutex.Unlock()
			
 
				-	if gpuHandles == nil {
			
 
				-		initGPUHandles()
			
 
				-	}
			
 
				+
			
 
				+	gpuHandles := initGPUHandles()
			
 
				+	defer func() {
			
 
				+		if gpuHandles.nvml != nil {
			
 
				+			C.nvml_release(*gpuHandles.nvml)
			
 
				+		}
			
 
				+		if gpuHandles.cudart != nil {
			
 
				+			C.cudart_release(*gpuHandles.cudart)
			
 
				+		}
			
 
				+	}()
			
 
				 
			
 
				 	// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
			
 
				 	cpuVariant := GetCPUVariant()
			
--- a/gpu/gpu_info_cudart.c
+++ b/gpu/gpu_info_cudart.c
@@ -191,4 +191,10 @@ void cudart_compute_capability(cudart_handle_t h, cudart_compute_capability_t *r
 
				   }
			
 
				 }
			
 
				 
			
 
				+void cudart_release(cudart_handle_t h) {
			
 
				+  LOG(h.verbose, "releasing cudart library\n");
			
 
				+  UNLOAD_LIBRARY(h.handle);
			
 
				+  h.handle = NULL;
			
 
				+}
			
 
				+
			
 
				 #endif  // __APPLE__
			
--- a/gpu/gpu_info_cudart.h
+++ b/gpu/gpu_info_cudart.h
@@ -55,6 +55,7 @@ typedef struct cudart_compute_capability {
 
				 void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp);
			
 
				 void cudart_check_vram(cudart_handle_t ch, mem_info_t *resp);
			
 
				 void cudart_compute_capability(cudart_handle_t th, cudart_compute_capability_t *cc);
			
 
				+void cudart_release(cudart_handle_t ch);
			
 
				 
			
 
				 #endif  // __GPU_INFO_CUDART_H__
			
 
				 #endif  // __APPLE__
			
--- a/gpu/gpu_info_nvml.c
+++ b/gpu/gpu_info_nvml.c
@@ -211,4 +211,11 @@ void nvml_compute_capability(nvml_handle_t h, nvml_compute_capability_t *resp) {
 
				     }
			
 
				   }
			
 
				 }
			
 
				+
			
 
				+void nvml_release(nvml_handle_t h) {
			
 
				+  LOG(h.verbose, "releasing nvml library\n");
			
 
				+  UNLOAD_LIBRARY(h.handle);
			
 
				+  h.handle = NULL;
			
 
				+}
			
 
				+
			
 
				 #endif  // __APPLE__
			
--- a/gpu/gpu_info_nvml.h
+++ b/gpu/gpu_info_nvml.h
@@ -51,6 +51,7 @@ typedef struct nvml_compute_capability {
 
				 void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp);
			
 
				 void nvml_check_vram(nvml_handle_t ch, mem_info_t *resp);
			
 
				 void nvml_compute_capability(nvml_handle_t ch, nvml_compute_capability_t *cc);
			
 
				+void nvml_release(nvml_handle_t ch);
			
 
				 
			
 
				 #endif  // __GPU_INFO_NVML_H__
			
 
				 #endif  // __APPLE__