1 year ago · d74ce6bd4f
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -28,6 +28,9 @@ type handles struct {
 
				 var gpuMutex sync.Mutex
			
 
				 var gpuHandles *handles = nil
			
 
				 
			
 
				+// TODO verify this is the correct min version
			
 
				+const CudaComputeMajorMin = 5
			
 
				+
			
 
				 // Note: gpuMutex must already be held
			
 
				 func initGPUHandles() {
			
 
				 	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
			
@@ -73,7 +76,18 @@ func GetGPUInfo() GpuInfo {
 
				 			log.Printf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err))
			
 
				 			C.free(unsafe.Pointer(memInfo.err))
			
 
				 		} else {
			
 
				-			resp.Library = "cuda"
			
 
				+			// Verify minimum compute capability
			
 
				+			var cc C.cuda_compute_capability_t
			
 
				+			C.cuda_compute_capability(*gpuHandles.cuda, &cc)
			
 
				+			if cc.err != nil {
			
 
				+				log.Printf("error looking up CUDA GPU compute capability: %s", C.GoString(cc.err))
			
 
				+				C.free(unsafe.Pointer(cc.err))
			
 
				+			} else if cc.major >= CudaComputeMajorMin {
			
 
				+				log.Printf("CUDA Compute Capability detected: %d.%d", cc.major, cc.minor)
			
 
				+				resp.Library = "cuda"
			
 
				+			} else {
			
 
				+				log.Printf("CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor)
			
 
				+			}
			
 
				 		}
			
 
				 	} else if gpuHandles.rocm != nil {
			
 
				 		C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
			
--- a/gpu/gpu_info_cuda.c
+++ b/gpu/gpu_info_cuda.c
@@ -21,7 +21,7 @@ const char *cuda_lib_paths[] = {
 
				 };
			
 
				 #endif
			
 
				 
			
 
				-#define CUDA_LOOKUP_SIZE 5
			
 
				+#define CUDA_LOOKUP_SIZE 6
			
 
				 
			
 
				 void cuda_init(cuda_init_resp_t *resp) {
			
 
				   nvmlReturn_t ret;
			
@@ -39,6 +39,7 @@ void cuda_init(cuda_init_resp_t *resp) {
 
				       {"nvmlDeviceGetHandleByIndex", (void *)&resp->ch.getHandle},
			
 
				       {"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.getMemInfo},
			
 
				       {"nvmlDeviceGetCount_v2", (void *)&resp->ch.getCount},
			
 
				+      {"nvmlDeviceGetCudaComputeCapability", (void *)&resp->ch.getComputeCapability},
			
 
				   };
			
 
				 
			
 
				   for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) {
			
@@ -123,4 +124,53 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
 
				     resp->free += memInfo.free;
			
 
				   }
			
 
				 }
			
 
				+
			
 
				+void cuda_compute_capability(cuda_handle_t h, cuda_compute_capability_t *resp) {
			
 
				+  resp->err = NULL;
			
 
				+  resp->major = 0;
			
 
				+  resp->minor = 0;
			
 
				+  nvmlDevice_t device;
			
 
				+  int major = 0;
			
 
				+  int minor = 0;
			
 
				+  nvmlReturn_t ret;
			
 
				+  const int buflen = 256;
			
 
				+  char buf[buflen + 1];
			
 
				+  int i;
			
 
				+
			
 
				+  if (h.handle == NULL) {
			
 
				+    resp->err = strdup("nvml handle not initialized");
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  unsigned int devices;
			
 
				+  ret = (*h.getCount)(&devices);
			
 
				+  if (ret != NVML_SUCCESS) {
			
 
				+    snprintf(buf, buflen, "unable to get device count: %d", ret);
			
 
				+    resp->err = strdup(buf);
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  for (i = 0; i < devices; i++) {
			
 
				+    ret = (*h.getHandle)(i, &device);
			
 
				+    if (ret != NVML_SUCCESS) {
			
 
				+      snprintf(buf, buflen, "unable to get device handle %d: %d", i, ret);
			
 
				+      resp->err = strdup(buf);
			
 
				+      return;
			
 
				+    }
			
 
				+
			
 
				+    ret = (*h.getComputeCapability)(device, &major, &minor);
			
 
				+    if (ret != NVML_SUCCESS) {
			
 
				+      snprintf(buf, buflen, "device compute capability lookup failure %d: %d", i, ret);
			
 
				+      resp->err = strdup(buf);
			
 
				+      return;
			
 
				+    }
			
 
				+    // Report the lowest major.minor we detect as that limits our compatibility
			
 
				+    if (resp->major == 0 || resp->major > major ) {
			
 
				+      resp->major = major;
			
 
				+      resp->minor = minor;
			
 
				+    } else if ( resp->major == major && resp->minor > minor ) {
			
 
				+      resp->minor = minor;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				 #endif  // __APPLE__
			
--- a/gpu/gpu_info_cuda.h
+++ b/gpu/gpu_info_cuda.h
@@ -22,6 +22,7 @@ typedef struct cuda_handle {
 
				   nvmlReturn_t (*getHandle)(unsigned int, nvmlDevice_t *);
			
 
				   nvmlReturn_t (*getMemInfo)(nvmlDevice_t, nvmlMemory_t *);
			
 
				   nvmlReturn_t (*getCount)(unsigned int *);
			
 
				+  nvmlReturn_t (*getComputeCapability)(nvmlDevice_t, int* major, int* minor);
			
 
				 } cuda_handle_t;
			
 
				 
			
 
				 typedef struct cuda_init_resp {
			
@@ -29,8 +30,15 @@ typedef struct cuda_init_resp {
 
				   cuda_handle_t ch;
			
 
				 } cuda_init_resp_t;
			
 
				 
			
 
				+typedef struct cuda_compute_capability {
			
 
				+  char *err;
			
 
				+  int major;
			
 
				+  int minor;
			
 
				+} cuda_compute_capability_t;
			
 
				+
			
 
				 void cuda_init(cuda_init_resp_t *resp);
			
 
				 void cuda_check_vram(cuda_handle_t ch, mem_info_t *resp);
			
 
				+void cuda_compute_capability(cuda_handle_t ch, cuda_compute_capability_t *cc);
			
 
				 
			
 
				 #endif  // __GPU_INFO_CUDA_H__
			
 
				 #endif  // __APPLE__