1 year ago · 089daaeabc
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -26,6 +26,7 @@ import (
 
															 type handles struct {
														
 
															 	deviceCount int
														
 
															 	cudart      *C.cudart_handle_t
														
 
															+	nvcuda      *C.nvcuda_handle_t
														
 
															 }
														
 
															 const (
														
@@ -62,6 +63,22 @@ var CudartWindowsGlobs = []string{
 
															 	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
														
 
															 }
														
 
															+var NvcudaLinuxGlobs = []string{
														
 
															+	"/usr/local/cuda*/targets/*/lib/libcuda.so*",
														
 
															+	"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
														
 
															+	"/usr/lib/*-linux-gnu/libcuda.so*",
														
 
															+	"/usr/lib/wsl/lib/libcuda.so*",
														
 
															+	"/usr/lib/wsl/drivers/*/libcuda.so*",
														
 
															+	"/opt/cuda/lib*/libcuda.so*",
														
 
															+	"/usr/local/cuda/lib*/libcuda.so*",
														
 
															+	"/usr/lib*/libcuda.so*",
														
 
															+	"/usr/local/lib*/libcuda.so*",
														
 
															+}
														
 
															+
														
 
															+var NvcudaWindowsGlobs = []string{
														
 
															+	"c:\\windows\\system*\\nvcuda.dll",
														
 
															+}
														
 
															+
														
 
															 // Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
														
 
															 // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
														
 
															 var CudaTegra string = os.Getenv("JETSON_JETPACK")
														
@@ -74,6 +91,8 @@ func initGPUHandles() *handles {
 
															 	gpuHandles := &handles{}
														
 
															 	var cudartMgmtName string
														
 
															 	var cudartMgmtPatterns []string
														
 
															+	var nvcudaMgmtName string
														
 
															+	var nvcudaMgmtPatterns []string
														
 
															 	tmpDir, _ := PayloadsDir()
														
 
															 	switch runtime.GOOS {
														
@@ -82,6 +101,9 @@ func initGPUHandles() *handles {
 
															 		localAppData := os.Getenv("LOCALAPPDATA")
														
 
															 		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
														
 
															 		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
														
 
															+		// Aligned with driver, we can't carry as payloads
														
 
															+		nvcudaMgmtName = "nvcuda.dll"
														
 
															+		nvcudaMgmtPatterns = NvcudaWindowsGlobs
														
 
															 	case "linux":
														
 
															 		cudartMgmtName = "libcudart.so*"
														
 
															 		if tmpDir != "" {
														
@@ -89,11 +111,25 @@ func initGPUHandles() *handles {
 
															 			cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
														
 
															 		}
														
 
															 		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
														
 
															+		// Aligned with driver, we can't carry as payloads
														
 
															+		nvcudaMgmtName = "libcuda.so*"
														
 
															+		nvcudaMgmtPatterns = NvcudaLinuxGlobs
														
 
															 	default:
														
 
															 		return gpuHandles
														
 
															 	}
														
 
															 	slog.Info("Detecting GPUs")
														
 
															+	nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
														
 
															+	if len(nvcudaLibPaths) > 0 {
														
 
															+		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
														
 
															+		if nvcuda != nil {
														
 
															+			slog.Info("detected GPUs", "count", deviceCount, "library", libPath)
														
 
															+			gpuHandles.nvcuda = nvcuda
														
 
															+			gpuHandles.deviceCount = deviceCount
														
 
															+			return gpuHandles
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															 	cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
														
 
															 	if len(cudartLibPaths) > 0 {
														
 
															 		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
														
@@ -118,6 +154,9 @@ func GetGPUInfo() GpuInfoList {
 
															 		if gpuHandles.cudart != nil {
														
 
															 			C.cudart_release(*gpuHandles.cudart)
														
 
															 		}
														
 
															+		if gpuHandles.nvcuda != nil {
														
 
															+			C.nvcuda_release(*gpuHandles.nvcuda)
														
 
															+		}
														
 
															 	}()
														
 
															 	// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
														
@@ -138,7 +177,11 @@ func GetGPUInfo() GpuInfoList {
 
															 		gpuInfo := GpuInfo{
														
 
															 			Library: "cuda",
														
 
															 		}
														
 
															-		C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
														
 
															+		if gpuHandles.cudart != nil {
														
 
															+			C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
														
 
															+		} else {
														
 
															+			C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
														
 
															+		}
														
 
															 		if memInfo.err != nil {
														
 
															 			slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
														
 
															 			C.free(unsafe.Pointer(memInfo.err))
														
@@ -196,9 +239,10 @@ func GetCPUMem() (memInfo, error) {
 
															 	return ret, nil
														
 
															 }
														
 
															-func FindGPULibs(baseLibName string, patterns []string) []string {
														
 
															+func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
														
 
															 	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
														
 
															 	var ldPaths []string
														
 
															+	var patterns []string
														
 
															 	gpuLibPaths := []string{}
														
 
															 	slog.Debug("Searching for GPU library", "name", baseLibName)
														
@@ -218,6 +262,7 @@ func FindGPULibs(baseLibName string, patterns []string) []string {
 
															 		}
														
 
															 		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
														
 
															 	}
														
 
															+	patterns = append(patterns, defaultPatterns...)
														
 
															 	slog.Debug("gpu library search", "globs", patterns)
														
 
															 	for _, pattern := range patterns {
														
 
															 		// Ignore glob discovery errors
														
@@ -267,6 +312,23 @@ func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
 
															 	return 0, nil, ""
														
 
															 }
														
 
															+func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
														
 
															+	var resp C.nvcuda_init_resp_t
														
 
															+	resp.ch.verbose = getVerboseState()
														
 
															+	for _, libPath := range nvcudaLibPaths {
														
 
															+		lib := C.CString(libPath)
														
 
															+		defer C.free(unsafe.Pointer(lib))
														
 
															+		C.nvcuda_init(lib, &resp)
														
 
															+		if resp.err != nil {
														
 
															+			slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
														
 
															+			C.free(unsafe.Pointer(resp.err))
														
 
															+		} else {
														
 
															+			return int(resp.num_devices), &resp.ch, libPath
														
 
															+		}
														
 
															+	}
														
 
															+	return 0, nil, ""
														
 
															+}
														
 
															+
														
 
															 func getVerboseState() C.uint16_t {
														
 
															 	if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
														
 
															 		return C.uint16_t(1)
														
--- a/gpu/gpu_info.h
+++ b/gpu/gpu_info.h
@@ -58,6 +58,7 @@ void cpu_check_ram(mem_info_t *resp);
 
															 #endif
														
 
															 #include "gpu_info_cudart.h"
														
 
															+#include "gpu_info_nvcuda.h"
														
 
															 #endif  // __GPU_INFO_H__
														
 
															 #endif  // __APPLE__
														
--- a/gpu/gpu_info_cudart.h
+++ b/gpu/gpu_info_cudart.h
@@ -6,9 +6,9 @@
 
															 // Just enough typedef's to dlopen/dlsym for memory information
														
 
															 typedef enum cudartReturn_enum {
														
 
															   CUDART_SUCCESS = 0,
														
 
															-  CUDA_ERROR_INVALID_VALUE = 1,
														
 
															-  CUDA_ERROR_MEMORY_ALLOCATION = 2,
														
 
															-  CUDA_ERROR_INSUFFICIENT_DRIVER = 35,
														
 
															+  CUDART_ERROR_INVALID_VALUE = 1,
														
 
															+  CUDART_ERROR_MEMORY_ALLOCATION = 2,
														
 
															+  CUDART_ERROR_INSUFFICIENT_DRIVER = 35,
														
 
															   // Other values omitted for now...
														
 
															 } cudartReturn_t;
														
--- a/gpu/gpu_info_nvcuda.c
+++ b/gpu/gpu_info_nvcuda.c
@@ -0,0 +1,203 @@
 
															+#ifndef __APPLE__  // TODO - maybe consider nvidia support on intel macs?
														
 
															+
														
 
															+#include <string.h>
														
 
															+#include "gpu_info_nvcuda.h"
														
 
															+
														
 
															+void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
														
 
															+  CUresult ret;
														
 
															+  resp->err = NULL;
														
 
															+  resp->num_devices = 0;
														
 
															+  const int buflen = 256;
														
 
															+  char buf[buflen + 1];
														
 
															+  int i;
														
 
															+
														
 
															+  struct lookup {
														
 
															+    char *s;
														
 
															+    void **p;
														
 
															+  } l[] = {
														
 
															+   
														
 
															+      {"cuInit", (void *)&resp->ch.cuInit},
														
 
															+      {"cuDriverGetVersion", (void *)&resp->ch.cuDriverGetVersion},
														
 
															+      {"cuDeviceGetCount", (void *)&resp->ch.cuDeviceGetCount},
														
 
															+      {"cuDeviceGet", (void *)&resp->ch.cuDeviceGet},
														
 
															+      {"cuDeviceGetAttribute", (void *)&resp->ch.cuDeviceGetAttribute},
														
 
															+      {"cuDeviceGetUuid", (void *)&resp->ch.cuDeviceGetUuid},
														
 
															+      {"cuCtxCreate_v3", (void *)&resp->ch.cuCtxCreate_v3},
														
 
															+      {"cuMemGetInfo_v2", (void *)&resp->ch.cuMemGetInfo_v2},
														
 
															+      {"cuCtxDestroy", (void *)&resp->ch.cuCtxDestroy},
														
 
															+      {NULL, NULL},
														
 
															+  };
														
 
															+
														
 
															+  resp->ch.handle = LOAD_LIBRARY(nvcuda_lib_path, RTLD_LAZY);
														
 
															+  if (!resp->ch.handle) {
														
 
															+    char *msg = LOAD_ERR();
														
 
															+    LOG(resp->ch.verbose, "library %s load err: %s\n", nvcuda_lib_path, msg);
														
 
															+    snprintf(buf, buflen,
														
 
															+            "Unable to load %s library to query for Nvidia GPUs: %s",
														
 
															+            nvcuda_lib_path, msg);
														
 
															+    free(msg);
														
 
															+    resp->err = strdup(buf);
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  for (i = 0; l[i].s != NULL; i++) {
														
 
															+    *l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
														
 
															+    if (!*l[i].p) {
														
 
															+      char *msg = LOAD_ERR();
														
 
															+      LOG(resp->ch.verbose, "dlerr: %s\n", msg);
														
 
															+      UNLOAD_LIBRARY(resp->ch.handle);
														
 
															+      resp->ch.handle = NULL;
														
 
															+      snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
														
 
															+              msg);
														
 
															+      free(msg);
														
 
															+      resp->err = strdup(buf);
														
 
															+      return;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  ret = (*resp->ch.cuInit)(0);
														
 
															+  if (ret != CUDA_SUCCESS) {
														
 
															+    LOG(resp->ch.verbose, "cuInit err: %d\n", ret);
														
 
															+    UNLOAD_LIBRARY(resp->ch.handle);
														
 
															+    resp->ch.handle = NULL;
														
 
															+    if (ret == CUDA_ERROR_INSUFFICIENT_DRIVER) {
														
 
															+      resp->err = strdup("your nvidia driver is too old or missing.  If you have a CUDA GPU please upgrade to run ollama");
														
 
															+      return;
														
 
															+    }
														
 
															+    snprintf(buf, buflen, "nvcuda init failure: %d", ret);
														
 
															+    resp->err = strdup(buf);
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  int version = 0;
														
 
															+  nvcudaDriverVersion_t driverVersion;
														
 
															+  driverVersion.major = 0;
														
 
															+  driverVersion.minor = 0;
														
 
															+
														
 
															+  // Report driver version if we're in verbose mode, ignore errors
														
 
															+  ret = (*resp->ch.cuDriverGetVersion)(&version);
														
 
															+  if (ret != CUDA_SUCCESS) {
														
 
															+    LOG(resp->ch.verbose, "cuDriverGetVersion failed: %d\n", ret);
														
 
															+  } else {
														
 
															+    driverVersion.major = version / 1000;
														
 
															+    driverVersion.minor = (version - (driverVersion.major * 1000)) / 10;
														
 
															+    LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", driverVersion.major, driverVersion.minor);
														
 
															+  }
														
 
															+
														
 
															+  ret = (*resp->ch.cuDeviceGetCount)(&resp->num_devices);
														
 
															+  if (ret != CUDA_SUCCESS) {
														
 
															+    LOG(resp->ch.verbose, "cuDeviceGetCount err: %d\n", ret);
														
 
															+    UNLOAD_LIBRARY(resp->ch.handle);
														
 
															+    resp->ch.handle = NULL;
														
 
															+    snprintf(buf, buflen, "unable to get device count: %d", ret);
														
 
															+    resp->err = strdup(buf);
														
 
															+    return;
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+const int buflen = 256;
														
 
															+void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
														
 
															+  resp->err = NULL;
														
 
															+  nvcudaMemory_t memInfo = {0,0};
														
 
															+  CUresult ret;
														
 
															+  CUdevice device = -1;
														
 
															+  CUcontext ctx = NULL;
														
 
															+  char buf[buflen + 1];
														
 
															+  CUuuid uuid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
														
 
															+
														
 
															+  if (h.handle == NULL) {
														
 
															+    resp->err = strdup("nvcuda handle isn't initialized");
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  ret = (*h.cuDeviceGet)(&device, i);
														
 
															+  if (ret != CUDA_SUCCESS) {
														
 
															+    snprintf(buf, buflen, "nvcuda device failed to initialize");
														
 
															+    resp->err = strdup(buf);
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  resp->major = 0;
														
 
															+  resp->minor = 0;
														
 
															+  int major = 0;
														
 
															+  int minor = 0;
														
 
															+  ret = (*h.cuDeviceGetAttribute)(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device);
														
 
															+  if (ret != CUDA_SUCCESS) {
														
 
															+    LOG(h.verbose, "[%d] device major lookup failure: %d\n", i, ret);
														
 
															+  } else {
														
 
															+    ret = (*h.cuDeviceGetAttribute)(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device);
														
 
															+    if (ret != CUDA_SUCCESS) {
														
 
															+      LOG(h.verbose, "[%d] device minor lookup failure: %d\n", i, ret);
														
 
															+    } else {
														
 
															+      resp->minor = minor;  
														
 
															+      resp->major = major;  
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  ret = (*h.cuDeviceGetUuid)(&uuid, device);
														
 
															+  if (ret != CUDA_SUCCESS) {
														
 
															+    LOG(h.verbose, "[%d] device uuid lookup failure: %d\n", i, ret);
														
 
															+    snprintf(&resp->gpu_id[0], GPU_ID_LEN, "%d", i);
														
 
															+  } else {
														
 
															+    // GPU-d110a105-ac29-1d54-7b49-9c90440f215b
														
 
															+    snprintf(&resp->gpu_id[0], GPU_ID_LEN,
														
 
															+        "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
														
 
															+        uuid.bytes[0],
														
 
															+        uuid.bytes[1],
														
 
															+        uuid.bytes[2],
														
 
															+        uuid.bytes[3],
														
 
															+        uuid.bytes[4],
														
 
															+        uuid.bytes[5],
														
 
															+        uuid.bytes[6],
														
 
															+        uuid.bytes[7],
														
 
															+        uuid.bytes[8],
														
 
															+        uuid.bytes[9],
														
 
															+        uuid.bytes[10],
														
 
															+        uuid.bytes[11],
														
 
															+        uuid.bytes[12],
														
 
															+        uuid.bytes[13],
														
 
															+        uuid.bytes[14],
														
 
															+        uuid.bytes[15]
														
 
															+      );
														
 
															+  }
														
 
															+
														
 
															+  // To get memory we have to set (and release) a context
														
 
															+  ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device);
														
 
															+  if (ret != CUDA_SUCCESS) {
														
 
															+    snprintf(buf, buflen, "nvcuda failed to get primary device context %d", ret);
														
 
															+    resp->err = strdup(buf);
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  ret = (*h.cuMemGetInfo_v2)(&memInfo.free, &memInfo.total);
														
 
															+  if (ret != CUDA_SUCCESS) {
														
 
															+    snprintf(buf, buflen, "nvcuda device memory info lookup failure %d", ret);
														
 
															+    resp->err = strdup(buf);
														
 
															+    // Best effort on failure...
														
 
															+    (*h.cuCtxDestroy)(ctx);
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  resp->total = memInfo.total;
														
 
															+  resp->free = memInfo.free;
														
 
															+
														
 
															+  LOG(h.verbose, "[%s] CUDA totalMem %lu mb\n", resp->gpu_id, resp->total / 1024 / 1024);
														
 
															+  LOG(h.verbose, "[%s] CUDA freeMem %lu mb\n", resp->gpu_id, resp->free / 1024 / 1024);
														
 
															+  LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);
														
 
															+
														
 
															+  
														
 
															+
														
 
															+  ret = (*h.cuCtxDestroy)(ctx);
														
 
															+  if (ret != CUDA_SUCCESS) {
														
 
															+    LOG(1, "nvcuda failed to release primary device context %d", ret);
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+void nvcuda_release(nvcuda_handle_t h) {
														
 
															+  LOG(h.verbose, "releasing nvcuda library\n");
														
 
															+  UNLOAD_LIBRARY(h.handle);
														
 
															+  // TODO and other context release logic?
														
 
															+  h.handle = NULL;
														
 
															+}
														
 
															+
														
 
															+#endif  // __APPLE__
														
--- a/gpu/gpu_info_nvcuda.h
+++ b/gpu/gpu_info_nvcuda.h
@@ -0,0 +1,71 @@
 
															+#ifndef __APPLE__
														
 
															+#ifndef __GPU_INFO_NVCUDA_H__
														
 
															+#define __GPU_INFO_NVCUDA_H__
														
 
															+#include "gpu_info.h"
														
 
															+
														
 
															+// Just enough typedef's to dlopen/dlsym for memory information
														
 
															+typedef enum cudaError_enum {
														
 
															+  CUDA_SUCCESS = 0,
														
 
															+  CUDA_ERROR_INVALID_VALUE = 1,
														
 
															+  CUDA_ERROR_MEMORY_ALLOCATION = 2,
														
 
															+  CUDA_ERROR_NOT_INITIALIZED = 3,
														
 
															+  CUDA_ERROR_INSUFFICIENT_DRIVER = 35,
														
 
															+  // Other values omitted for now...
														
 
															+} CUresult;
														
 
															+
														
 
															+typedef enum CUdevice_attribute_enum {
														
 
															+  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
														
 
															+  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
														
 
															+
														
 
															+  // TODO - not yet wired up but may be useful for Jetson or other
														
 
															+  // integrated GPU scenarios with shared memory
														
 
															+  CU_DEVICE_ATTRIBUTE_INTEGRATED = 18
														
 
															+
														
 
															+} CUdevice_attribute;
														
 
															+
														
 
															+typedef void *nvcudaDevice_t;  // Opaque is sufficient
														
 
															+typedef struct nvcudaMemory_st {
														
 
															+  uint64_t total;
														
 
															+  uint64_t free;
														
 
															+} nvcudaMemory_t;
														
 
															+
														
 
															+typedef struct nvcudaDriverVersion {
														
 
															+  int major;
														
 
															+  int minor;
														
 
															+} nvcudaDriverVersion_t;
														
 
															+
														
 
															+typedef struct CUuuid_st {
														
 
															+    unsigned char bytes[16];
														
 
															+} CUuuid;
														
 
															+
														
 
															+typedef int CUdevice;
														
 
															+typedef void* CUcontext;
														
 
															+
														
 
															+typedef struct nvcuda_handle {
														
 
															+  void *handle;
														
 
															+  uint16_t verbose;
														
 
															+  CUresult (*cuInit)(unsigned int Flags);
														
 
															+  CUresult (*cuDriverGetVersion)(int *driverVersion);
														
 
															+  CUresult (*cuDeviceGetCount)(int *);
														
 
															+  CUresult (*cuDeviceGet)(CUdevice* device, int ordinal);
														
 
															+  CUresult (*cuDeviceGetAttribute)(int* pi, CUdevice_attribute attrib, CUdevice dev);
														
 
															+  CUresult (*cuDeviceGetUuid)(CUuuid* uuid, CUdevice dev); // signature compatible with cuDeviceGetUuid_v2
														
 
															+
														
 
															+  // Context specific aspects
														
 
															+  CUresult (*cuCtxCreate_v3)(CUcontext* pctx, void *params, int len, unsigned int flags, CUdevice dev);
														
 
															+  CUresult (*cuMemGetInfo_v2)(uint64_t* free, uint64_t* total);
														
 
															+  CUresult (*cuCtxDestroy)(CUcontext ctx);
														
 
															+} nvcuda_handle_t;
														
 
															+
														
 
															+typedef struct nvcuda_init_resp {
														
 
															+  char *err;  // If err is non-null handle is invalid
														
 
															+  nvcuda_handle_t ch;
														
 
															+  int num_devices;
														
 
															+} nvcuda_init_resp_t;
														
 
															+
														
 
															+void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp);
														
 
															+void nvcuda_check_vram(nvcuda_handle_t ch, int device_id, mem_info_t *resp);
														
 
															+void nvcuda_release(nvcuda_handle_t ch);
														
 
															+
														
 
															+#endif  // __GPU_INFO_NVCUDA_H__
														
 
															+#endif  // __APPLE__