1 سال پیش · 06093fd396
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -27,6 +27,7 @@ import (
 
				 type handles struct {
			
 
				 	deviceCount int
			
 
				 	cudart      *C.cudart_handle_t
			
 
				+	nvcuda      *C.nvcuda_handle_t
			
 
				 }
			
 
				 
			
 
				 const (
			
@@ -63,6 +64,22 @@ var CudartWindowsGlobs = []string{
 
				 	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
			
 
				 }
			
 
				 
			
 
				+var NvcudaLinuxGlobs = []string{
			
 
				+	"/usr/local/cuda*/targets/*/lib/libcuda.so*",
			
 
				+	"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
			
 
				+	"/usr/lib/*-linux-gnu/libcuda.so*",
			
 
				+	"/usr/lib/wsl/lib/libcuda.so*",
			
 
				+	"/usr/lib/wsl/drivers/*/libcuda.so*",
			
 
				+	"/opt/cuda/lib*/libcuda.so*",
			
 
				+	"/usr/local/cuda/lib*/libcuda.so*",
			
 
				+	"/usr/lib*/libcuda.so*",
			
 
				+	"/usr/local/lib*/libcuda.so*",
			
 
				+}
			
 
				+
			
 
				+var NvcudaWindowsGlobs = []string{
			
 
				+	"c:\\windows\\system*\\nvcuda.dll",
			
 
				+}
			
 
				+
			
 
				 // Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
			
 
				 // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
			
 
				 var CudaTegra string = os.Getenv("JETSON_JETPACK")
			
@@ -75,6 +92,8 @@ func initGPUHandles() *handles {
 
				 	gpuHandles := &handles{}
			
 
				 	var cudartMgmtName string
			
 
				 	var cudartMgmtPatterns []string
			
 
				+	var nvcudaMgmtName string
			
 
				+	var nvcudaMgmtPatterns []string
			
 
				 
			
 
				 	tmpDir, _ := PayloadsDir()
			
 
				 	switch runtime.GOOS {
			
@@ -83,6 +102,9 @@ func initGPUHandles() *handles {
 
				 		localAppData := os.Getenv("LOCALAPPDATA")
			
 
				 		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
			
 
				 		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
			
 
				+		// Aligned with driver, we can't carry as payloads
			
 
				+		nvcudaMgmtName = "nvcuda.dll"
			
 
				+		nvcudaMgmtPatterns = NvcudaWindowsGlobs
			
 
				 	case "linux":
			
 
				 		cudartMgmtName = "libcudart.so*"
			
 
				 		if tmpDir != "" {
			
@@ -90,11 +112,25 @@ func initGPUHandles() *handles {
 
				 			cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
			
 
				 		}
			
 
				 		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
			
 
				+		// Aligned with driver, we can't carry as payloads
			
 
				+		nvcudaMgmtName = "libcuda.so*"
			
 
				+		nvcudaMgmtPatterns = NvcudaLinuxGlobs
			
 
				 	default:
			
 
				 		return gpuHandles
			
 
				 	}
			
 
				 
			
 
				 	slog.Info("Detecting GPUs")
			
 
				+	nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
			
 
				+	if len(nvcudaLibPaths) > 0 {
			
 
				+		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
			
 
				+		if nvcuda != nil {
			
 
				+			slog.Info("detected GPUs", "count", deviceCount, "library", libPath)
			
 
				+			gpuHandles.nvcuda = nvcuda
			
 
				+			gpuHandles.deviceCount = deviceCount
			
 
				+			return gpuHandles
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
			
 
				 	if len(cudartLibPaths) > 0 {
			
 
				 		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
			
@@ -119,6 +155,9 @@ func GetGPUInfo() GpuInfoList {
 
				 		if gpuHandles.cudart != nil {
			
 
				 			C.cudart_release(*gpuHandles.cudart)
			
 
				 		}
			
 
				+		if gpuHandles.nvcuda != nil {
			
 
				+			C.nvcuda_release(*gpuHandles.nvcuda)
			
 
				+		}
			
 
				 	}()
			
 
				 
			
 
				 	// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
			
@@ -139,7 +178,11 @@ func GetGPUInfo() GpuInfoList {
 
				 		gpuInfo := GpuInfo{
			
 
				 			Library: "cuda",
			
 
				 		}
			
 
				-		C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
			
 
				+		if gpuHandles.cudart != nil {
			
 
				+			C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
			
 
				+		} else {
			
 
				+			C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
			
 
				+		}
			
 
				 		if memInfo.err != nil {
			
 
				 			slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
			
 
				 			C.free(unsafe.Pointer(memInfo.err))
			
@@ -197,9 +240,10 @@ func GetCPUMem() (memInfo, error) {
 
				 	return ret, nil
			
 
				 }
			
 
				 
			
 
				-func FindGPULibs(baseLibName string, patterns []string) []string {
			
 
				+func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
			
 
				 	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
			
 
				 	var ldPaths []string
			
 
				+	var patterns []string
			
 
				 	gpuLibPaths := []string{}
			
 
				 	slog.Debug("Searching for GPU library", "name", baseLibName)
			
 
				 
			
@@ -219,6 +263,7 @@ func FindGPULibs(baseLibName string, patterns []string) []string {
 
				 		}
			
 
				 		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
			
 
				 	}
			
 
				+	patterns = append(patterns, defaultPatterns...)
			
 
				 	slog.Debug("gpu library search", "globs", patterns)
			
 
				 	for _, pattern := range patterns {
			
 
				 		// Ignore glob discovery errors
			
@@ -268,6 +313,23 @@ func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
 
				 	return 0, nil, ""
			
 
				 }
			
 
				 
			
 
				+func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
			
 
				+	var resp C.nvcuda_init_resp_t
			
 
				+	resp.ch.verbose = getVerboseState()
			
 
				+	for _, libPath := range nvcudaLibPaths {
			
 
				+		lib := C.CString(libPath)
			
 
				+		defer C.free(unsafe.Pointer(lib))
			
 
				+		C.nvcuda_init(lib, &resp)
			
 
				+		if resp.err != nil {
			
 
				+			slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
			
 
				+			C.free(unsafe.Pointer(resp.err))
			
 
				+		} else {
			
 
				+			return int(resp.num_devices), &resp.ch, libPath
			
 
				+		}
			
 
				+	}
			
 
				+	return 0, nil, ""
			
 
				+}
			
 
				+
			
 
				 func getVerboseState() C.uint16_t {
			
 
				 	if envconfig.Debug {
			
 
				 		return C.uint16_t(1)
			
--- a/gpu/gpu_info.h
+++ b/gpu/gpu_info.h
@@ -58,6 +58,7 @@ void cpu_check_ram(mem_info_t *resp);
 
				 #endif
			
 
				 
			
 
				 #include "gpu_info_cudart.h"
			
 
				+#include "gpu_info_nvcuda.h"
			
 
				 
			
 
				 #endif  // __GPU_INFO_H__
			
 
				 #endif  // __APPLE__
			
--- a/gpu/gpu_info_cudart.h
+++ b/gpu/gpu_info_cudart.h
@@ -6,9 +6,9 @@
 
				 // Just enough typedef's to dlopen/dlsym for memory information
			
 
				 typedef enum cudartReturn_enum {
			
 
				   CUDART_SUCCESS = 0,
			
 
				-  CUDA_ERROR_INVALID_VALUE = 1,
			
 
				-  CUDA_ERROR_MEMORY_ALLOCATION = 2,
			
 
				-  CUDA_ERROR_INSUFFICIENT_DRIVER = 35,
			
 
				+  CUDART_ERROR_INVALID_VALUE = 1,
			
 
				+  CUDART_ERROR_MEMORY_ALLOCATION = 2,
			
 
				+  CUDART_ERROR_INSUFFICIENT_DRIVER = 35,
			
 
				   // Other values omitted for now...
			
 
				 } cudartReturn_t;
			
 
				 
			
--- a/gpu/gpu_info_nvcuda.c
+++ b/gpu/gpu_info_nvcuda.c
@@ -0,0 +1,203 @@
 
				+#ifndef __APPLE__  // TODO - maybe consider nvidia support on intel macs?
			
 
				+
			
 
				+#include <string.h>
			
 
				+#include "gpu_info_nvcuda.h"
			
 
				+
			
 
				+void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
			
 
				+  CUresult ret;
			
 
				+  resp->err = NULL;
			
 
				+  resp->num_devices = 0;
			
 
				+  const int buflen = 256;
			
 
				+  char buf[buflen + 1];
			
 
				+  int i;
			
 
				+
			
 
				+  struct lookup {
			
 
				+    char *s;
			
 
				+    void **p;
			
 
				+  } l[] = {
			
 
				+   
			
 
				+      {"cuInit", (void *)&resp->ch.cuInit},
			
 
				+      {"cuDriverGetVersion", (void *)&resp->ch.cuDriverGetVersion},
			
 
				+      {"cuDeviceGetCount", (void *)&resp->ch.cuDeviceGetCount},
			
 
				+      {"cuDeviceGet", (void *)&resp->ch.cuDeviceGet},
			
 
				+      {"cuDeviceGetAttribute", (void *)&resp->ch.cuDeviceGetAttribute},
			
 
				+      {"cuDeviceGetUuid", (void *)&resp->ch.cuDeviceGetUuid},
			
 
				+      {"cuCtxCreate_v3", (void *)&resp->ch.cuCtxCreate_v3},
			
 
				+      {"cuMemGetInfo_v2", (void *)&resp->ch.cuMemGetInfo_v2},
			
 
				+      {"cuCtxDestroy", (void *)&resp->ch.cuCtxDestroy},
			
 
				+      {NULL, NULL},
			
 
				+  };
			
 
				+
			
 
				+  resp->ch.handle = LOAD_LIBRARY(nvcuda_lib_path, RTLD_LAZY);
			
 
				+  if (!resp->ch.handle) {
			
 
				+    char *msg = LOAD_ERR();
			
 
				+    LOG(resp->ch.verbose, "library %s load err: %s\n", nvcuda_lib_path, msg);
			
 
				+    snprintf(buf, buflen,
			
 
				+            "Unable to load %s library to query for Nvidia GPUs: %s",
			
 
				+            nvcuda_lib_path, msg);
			
 
				+    free(msg);
			
 
				+    resp->err = strdup(buf);
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  for (i = 0; l[i].s != NULL; i++) {
			
 
				+    *l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
			
 
				+    if (!*l[i].p) {
			
 
				+      char *msg = LOAD_ERR();
			
 
				+      LOG(resp->ch.verbose, "dlerr: %s\n", msg);
			
 
				+      UNLOAD_LIBRARY(resp->ch.handle);
			
 
				+      resp->ch.handle = NULL;
			
 
				+      snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
			
 
				+              msg);
			
 
				+      free(msg);
			
 
				+      resp->err = strdup(buf);
			
 
				+      return;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  ret = (*resp->ch.cuInit)(0);
			
 
				+  if (ret != CUDA_SUCCESS) {
			
 
				+    LOG(resp->ch.verbose, "cuInit err: %d\n", ret);
			
 
				+    UNLOAD_LIBRARY(resp->ch.handle);
			
 
				+    resp->ch.handle = NULL;
			
 
				+    if (ret == CUDA_ERROR_INSUFFICIENT_DRIVER) {
			
 
				+      resp->err = strdup("your nvidia driver is too old or missing.  If you have a CUDA GPU please upgrade to run ollama");
			
 
				+      return;
			
 
				+    }
			
 
				+    snprintf(buf, buflen, "nvcuda init failure: %d", ret);
			
 
				+    resp->err = strdup(buf);
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  int version = 0;
			
 
				+  nvcudaDriverVersion_t driverVersion;
			
 
				+  driverVersion.major = 0;
			
 
				+  driverVersion.minor = 0;
			
 
				+
			
 
				+  // Report driver version if we're in verbose mode, ignore errors
			
 
				+  ret = (*resp->ch.cuDriverGetVersion)(&version);
			
 
				+  if (ret != CUDA_SUCCESS) {
			
 
				+    LOG(resp->ch.verbose, "cuDriverGetVersion failed: %d\n", ret);
			
 
				+  } else {
			
 
				+    driverVersion.major = version / 1000;
			
 
				+    driverVersion.minor = (version - (driverVersion.major * 1000)) / 10;
			
 
				+    LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", driverVersion.major, driverVersion.minor);
			
 
				+  }
			
 
				+
			
 
				+  ret = (*resp->ch.cuDeviceGetCount)(&resp->num_devices);
			
 
				+  if (ret != CUDA_SUCCESS) {
			
 
				+    LOG(resp->ch.verbose, "cuDeviceGetCount err: %d\n", ret);
			
 
				+    UNLOAD_LIBRARY(resp->ch.handle);
			
 
				+    resp->ch.handle = NULL;
			
 
				+    snprintf(buf, buflen, "unable to get device count: %d", ret);
			
 
				+    resp->err = strdup(buf);
			
 
				+    return;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+const int buflen = 256;
			
 
				+void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
			
 
				+  resp->err = NULL;
			
 
				+  nvcudaMemory_t memInfo = {0,0};
			
 
				+  CUresult ret;
			
 
				+  CUdevice device = -1;
			
 
				+  CUcontext ctx = NULL;
			
 
				+  char buf[buflen + 1];
			
 
				+  CUuuid uuid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
			
 
				+
			
 
				+  if (h.handle == NULL) {
			
 
				+    resp->err = strdup("nvcuda handle isn't initialized");
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  ret = (*h.cuDeviceGet)(&device, i);
			
 
				+  if (ret != CUDA_SUCCESS) {
			
 
				+    snprintf(buf, buflen, "nvcuda device failed to initialize");
			
 
				+    resp->err = strdup(buf);
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  resp->major = 0;
			
 
				+  resp->minor = 0;
			
 
				+  int major = 0;
			
 
				+  int minor = 0;
			
 
				+  ret = (*h.cuDeviceGetAttribute)(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device);
			
 
				+  if (ret != CUDA_SUCCESS) {
			
 
				+    LOG(h.verbose, "[%d] device major lookup failure: %d\n", i, ret);
			
 
				+  } else {
			
 
				+    ret = (*h.cuDeviceGetAttribute)(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device);
			
 
				+    if (ret != CUDA_SUCCESS) {
			
 
				+      LOG(h.verbose, "[%d] device minor lookup failure: %d\n", i, ret);
			
 
				+    } else {
			
 
				+      resp->minor = minor;  
			
 
				+      resp->major = major;  
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  ret = (*h.cuDeviceGetUuid)(&uuid, device);
			
 
				+  if (ret != CUDA_SUCCESS) {
			
 
				+    LOG(h.verbose, "[%d] device uuid lookup failure: %d\n", i, ret);
			
 
				+    snprintf(&resp->gpu_id[0], GPU_ID_LEN, "%d", i);
			
 
				+  } else {
			
 
				+    // GPU-d110a105-ac29-1d54-7b49-9c90440f215b
			
 
				+    snprintf(&resp->gpu_id[0], GPU_ID_LEN,
			
 
				+        "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
			
 
				+        uuid.bytes[0],
			
 
				+        uuid.bytes[1],
			
 
				+        uuid.bytes[2],
			
 
				+        uuid.bytes[3],
			
 
				+        uuid.bytes[4],
			
 
				+        uuid.bytes[5],
			
 
				+        uuid.bytes[6],
			
 
				+        uuid.bytes[7],
			
 
				+        uuid.bytes[8],
			
 
				+        uuid.bytes[9],
			
 
				+        uuid.bytes[10],
			
 
				+        uuid.bytes[11],
			
 
				+        uuid.bytes[12],
			
 
				+        uuid.bytes[13],
			
 
				+        uuid.bytes[14],
			
 
				+        uuid.bytes[15]
			
 
				+      );
			
 
				+  }
			
 
				+
			
 
				+  // To get memory we have to set (and release) a context
			
 
				+  ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device);
			
 
				+  if (ret != CUDA_SUCCESS) {
			
 
				+    snprintf(buf, buflen, "nvcuda failed to get primary device context %d", ret);
			
 
				+    resp->err = strdup(buf);
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  ret = (*h.cuMemGetInfo_v2)(&memInfo.free, &memInfo.total);
			
 
				+  if (ret != CUDA_SUCCESS) {
			
 
				+    snprintf(buf, buflen, "nvcuda device memory info lookup failure %d", ret);
			
 
				+    resp->err = strdup(buf);
			
 
				+    // Best effort on failure...
			
 
				+    (*h.cuCtxDestroy)(ctx);
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  resp->total = memInfo.total;
			
 
				+  resp->free = memInfo.free;
			
 
				+
			
 
				+  LOG(h.verbose, "[%s] CUDA totalMem %lu mb\n", resp->gpu_id, resp->total / 1024 / 1024);
			
 
				+  LOG(h.verbose, "[%s] CUDA freeMem %lu mb\n", resp->gpu_id, resp->free / 1024 / 1024);
			
 
				+  LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);
			
 
				+
			
 
				+  
			
 
				+
			
 
				+  ret = (*h.cuCtxDestroy)(ctx);
			
 
				+  if (ret != CUDA_SUCCESS) {
			
 
				+    LOG(1, "nvcuda failed to release primary device context %d", ret);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void nvcuda_release(nvcuda_handle_t h) {
			
 
				+  LOG(h.verbose, "releasing nvcuda library\n");
			
 
				+  UNLOAD_LIBRARY(h.handle);
			
 
				+  // TODO and other context release logic?
			
 
				+  h.handle = NULL;
			
 
				+}
			
 
				+
			
 
				+#endif  // __APPLE__
			
--- a/gpu/gpu_info_nvcuda.h
+++ b/gpu/gpu_info_nvcuda.h
@@ -0,0 +1,71 @@
 
				+#ifndef __APPLE__
			
 
				+#ifndef __GPU_INFO_NVCUDA_H__
			
 
				+#define __GPU_INFO_NVCUDA_H__
			
 
				+#include "gpu_info.h"
			
 
				+
			
 
				+// Just enough typedef's to dlopen/dlsym for memory information
			
 
				+typedef enum cudaError_enum {
			
 
				+  CUDA_SUCCESS = 0,
			
 
				+  CUDA_ERROR_INVALID_VALUE = 1,
			
 
				+  CUDA_ERROR_MEMORY_ALLOCATION = 2,
			
 
				+  CUDA_ERROR_NOT_INITIALIZED = 3,
			
 
				+  CUDA_ERROR_INSUFFICIENT_DRIVER = 35,
			
 
				+  // Other values omitted for now...
			
 
				+} CUresult;
			
 
				+
			
 
				+typedef enum CUdevice_attribute_enum {
			
 
				+  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
			
 
				+  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
			
 
				+
			
 
				+  // TODO - not yet wired up but may be useful for Jetson or other
			
 
				+  // integrated GPU scenarios with shared memory
			
 
				+  CU_DEVICE_ATTRIBUTE_INTEGRATED = 18
			
 
				+
			
 
				+} CUdevice_attribute;
			
 
				+
			
 
				+typedef void *nvcudaDevice_t;  // Opaque is sufficient
			
 
				+typedef struct nvcudaMemory_st {
			
 
				+  uint64_t total;
			
 
				+  uint64_t free;
			
 
				+} nvcudaMemory_t;
			
 
				+
			
 
				+typedef struct nvcudaDriverVersion {
			
 
				+  int major;
			
 
				+  int minor;
			
 
				+} nvcudaDriverVersion_t;
			
 
				+
			
 
				+typedef struct CUuuid_st {
			
 
				+    unsigned char bytes[16];
			
 
				+} CUuuid;
			
 
				+
			
 
				+typedef int CUdevice;
			
 
				+typedef void* CUcontext;
			
 
				+
			
 
				+typedef struct nvcuda_handle {
			
 
				+  void *handle;
			
 
				+  uint16_t verbose;
			
 
				+  CUresult (*cuInit)(unsigned int Flags);
			
 
				+  CUresult (*cuDriverGetVersion)(int *driverVersion);
			
 
				+  CUresult (*cuDeviceGetCount)(int *);
			
 
				+  CUresult (*cuDeviceGet)(CUdevice* device, int ordinal);
			
 
				+  CUresult (*cuDeviceGetAttribute)(int* pi, CUdevice_attribute attrib, CUdevice dev);
			
 
				+  CUresult (*cuDeviceGetUuid)(CUuuid* uuid, CUdevice dev); // signature compatible with cuDeviceGetUuid_v2
			
 
				+
			
 
				+  // Context specific aspects
			
 
				+  CUresult (*cuCtxCreate_v3)(CUcontext* pctx, void *params, int len, unsigned int flags, CUdevice dev);
			
 
				+  CUresult (*cuMemGetInfo_v2)(uint64_t* free, uint64_t* total);
			
 
				+  CUresult (*cuCtxDestroy)(CUcontext ctx);
			
 
				+} nvcuda_handle_t;
			
 
				+
			
 
				+typedef struct nvcuda_init_resp {
			
 
				+  char *err;  // If err is non-null handle is invalid
			
 
				+  nvcuda_handle_t ch;
			
 
				+  int num_devices;
			
 
				+} nvcuda_init_resp_t;
			
 
				+
			
 
				+void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp);
			
 
				+void nvcuda_check_vram(nvcuda_handle_t ch, int device_id, mem_info_t *resp);
			
 
				+void nvcuda_release(nvcuda_handle_t ch);
			
 
				+
			
 
				+#endif  // __GPU_INFO_NVCUDA_H__
			
 
				+#endif  // __APPLE__