123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
- #ifndef __APPLE__
- #ifndef __GPU_INFO_CUDART_H__
- #define __GPU_INFO_CUDART_H__
- #include "gpu_info.h"
- // Just enough typedef's to dlopen/dlsym for memory information
- typedef enum cudartReturn_enum {
- CUDART_SUCCESS = 0,
- CUDART_ERROR_INVALID_VALUE = 1,
- CUDART_ERROR_MEMORY_ALLOCATION = 2,
- CUDART_ERROR_INSUFFICIENT_DRIVER = 35,
- // Other values omitted for now...
- } cudartReturn_t;
- typedef enum cudartDeviceAttr_enum {
- cudartDevAttrComputeCapabilityMajor = 75,
- cudartDevAttrComputeCapabilityMinor = 76,
- // TODO - not yet wired up but may be useful for Jetson or other
- // integrated GPU scenarios with shared memory
- cudaDevAttrIntegrated = 18
- } cudartDeviceAttr_t;
- typedef void *cudartDevice_t; // Opaque is sufficient
- typedef struct cudartMemory_st {
- size_t total;
- size_t free;
- size_t used;
- } cudartMemory_t;
- typedef struct cudartDriverVersion {
- int major;
- int minor;
- } cudartDriverVersion_t;
- typedef struct cudaUUID {
- unsigned char bytes[16];
- } cudaUUID_t;
- typedef struct cudaDeviceProp {
- char name[256]; /**< ASCII string identifying device */
- cudaUUID_t uuid; /**< 16-byte unique identifier */
- char luid[8]; /**< 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms */
- unsigned int luidDeviceNodeMask; /**< LUID device node mask. Value is undefined on TCC and non-Windows platforms */
- size_t totalGlobalMem; /**< Global memory available on device in bytes */
- size_t sharedMemPerBlock; /**< Shared memory available per block in bytes */
- int regsPerBlock; /**< 32-bit registers available per block */
- int warpSize; /**< Warp size in threads */
- size_t memPitch; /**< Maximum pitch in bytes allowed by memory copies */
- int maxThreadsPerBlock; /**< Maximum number of threads per block */
- int maxThreadsDim[3]; /**< Maximum size of each dimension of a block */
- int maxGridSize[3]; /**< Maximum size of each dimension of a grid */
- int clockRate; /**< Clock frequency in kilohertz */
- size_t totalConstMem; /**< Constant memory available on device in bytes */
- int major; /**< Major compute capability */
- int minor; /**< Minor compute capability */
- size_t textureAlignment; /**< Alignment requirement for textures */
- size_t texturePitchAlignment; /**< Pitch alignment requirement for texture references bound to pitched memory */
- int deviceOverlap; /**< Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. */
- int multiProcessorCount; /**< Number of multiprocessors on device */
- int kernelExecTimeoutEnabled; /**< Specified whether there is a run time limit on kernels */
- int integrated; /**< Device is integrated as opposed to discrete */
- int canMapHostMemory; /**< Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer */
- int computeMode; /**< Compute mode (See ::cudaComputeMode) */
- int maxTexture1D; /**< Maximum 1D texture size */
- int maxTexture1DMipmap; /**< Maximum 1D mipmapped texture size */
- int maxTexture1DLinear; /**< Deprecated, do not use. Use cudaDeviceGetTexture1DLinearMaxWidth() or cuDeviceGetTexture1DLinearMaxWidth() instead. */
- int maxTexture2D[2]; /**< Maximum 2D texture dimensions */
- int maxTexture2DMipmap[2]; /**< Maximum 2D mipmapped texture dimensions */
- int maxTexture2DLinear[3]; /**< Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory */
- int maxTexture2DGather[2]; /**< Maximum 2D texture dimensions if texture gather operations have to be performed */
- int maxTexture3D[3]; /**< Maximum 3D texture dimensions */
- int maxTexture3DAlt[3]; /**< Maximum alternate 3D texture dimensions */
- int maxTextureCubemap; /**< Maximum Cubemap texture dimensions */
- int maxTexture1DLayered[2]; /**< Maximum 1D layered texture dimensions */
- int maxTexture2DLayered[3]; /**< Maximum 2D layered texture dimensions */
- int maxTextureCubemapLayered[2];/**< Maximum Cubemap layered texture dimensions */
- int maxSurface1D; /**< Maximum 1D surface size */
- int maxSurface2D[2]; /**< Maximum 2D surface dimensions */
- int maxSurface3D[3]; /**< Maximum 3D surface dimensions */
- int maxSurface1DLayered[2]; /**< Maximum 1D layered surface dimensions */
- int maxSurface2DLayered[3]; /**< Maximum 2D layered surface dimensions */
- int maxSurfaceCubemap; /**< Maximum Cubemap surface dimensions */
- int maxSurfaceCubemapLayered[2];/**< Maximum Cubemap layered surface dimensions */
- size_t surfaceAlignment; /**< Alignment requirements for surfaces */
- int concurrentKernels; /**< Device can possibly execute multiple kernels concurrently */
- int ECCEnabled; /**< Device has ECC support enabled */
- int pciBusID; /**< PCI bus ID of the device */
- int pciDeviceID; /**< PCI device ID of the device */
- int pciDomainID; /**< PCI domain ID of the device */
- int tccDriver; /**< 1 if device is a Tesla device using TCC driver, 0 otherwise */
- int asyncEngineCount; /**< Number of asynchronous engines */
- int unifiedAddressing; /**< Device shares a unified address space with the host */
- int memoryClockRate; /**< Peak memory clock frequency in kilohertz */
- int memoryBusWidth; /**< Global memory bus width in bits */
- int l2CacheSize; /**< Size of L2 cache in bytes */
- int persistingL2CacheMaxSize; /**< Device's maximum l2 persisting lines capacity setting in bytes */
- int maxThreadsPerMultiProcessor;/**< Maximum resident threads per multiprocessor */
- int streamPrioritiesSupported; /**< Device supports stream priorities */
- int globalL1CacheSupported; /**< Device supports caching globals in L1 */
- int localL1CacheSupported; /**< Device supports caching locals in L1 */
- size_t sharedMemPerMultiprocessor; /**< Shared memory available per multiprocessor in bytes */
- int regsPerMultiprocessor; /**< 32-bit registers available per multiprocessor */
- int managedMemory; /**< Device supports allocating managed memory on this system */
- int isMultiGpuBoard; /**< Device is on a multi-GPU board */
- int multiGpuBoardGroupID; /**< Unique identifier for a group of devices on the same multi-GPU board */
- int hostNativeAtomicSupported; /**< Link between the device and the host supports native atomic operations */
- int singleToDoublePrecisionPerfRatio; /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */
- int pageableMemoryAccess; /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */
- int concurrentManagedAccess; /**< Device can coherently access managed memory concurrently with the CPU */
- int computePreemptionSupported; /**< Device supports Compute Preemption */
- int canUseHostPointerForRegisteredMem; /**< Device can access host registered memory at the same virtual address as the CPU */
- int cooperativeLaunch; /**< Device supports launching cooperative kernels via ::cudaLaunchCooperativeKernel */
- int cooperativeMultiDeviceLaunch; /**< Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated. */
- size_t sharedMemPerBlockOptin; /**< Per device maximum shared memory per block usable by special opt in */
- int pageableMemoryAccessUsesHostPageTables; /**< Device accesses pageable memory via the host's page tables */
- int directManagedMemAccessFromHost; /**< Host can directly access managed memory on the device without migration. */
- int maxBlocksPerMultiProcessor; /**< Maximum number of resident blocks per multiprocessor */
- int accessPolicyMaxWindowSize; /**< The maximum value of ::cudaAccessPolicyWindow::num_bytes. */
- size_t reservedSharedMemPerBlock; /**< Shared memory reserved by CUDA driver per block in bytes */
- } cudaDeviceProp_t;
- typedef struct cudart_handle {
- void *handle;
- uint16_t verbose;
- cudartReturn_t (*cudaSetDevice)(int device);
- cudartReturn_t (*cudaDeviceSynchronize)(void);
- cudartReturn_t (*cudaDeviceReset)(void);
- cudartReturn_t (*cudaMemGetInfo)(size_t *, size_t *);
- cudartReturn_t (*cudaGetDeviceCount)(int *);
- cudartReturn_t (*cudaDeviceGetAttribute)(int* value, cudartDeviceAttr_t attr, int device);
- cudartReturn_t (*cudaDriverGetVersion) (int *driverVersion);
- cudartReturn_t (*cudaGetDeviceProperties) (cudaDeviceProp_t* prop, int device);
- } cudart_handle_t;
- typedef struct cudart_init_resp {
- char *err; // If err is non-null handle is invalid
- cudart_handle_t ch;
- int num_devices;
- } cudart_init_resp_t;
- void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp);
- void cudart_check_vram(cudart_handle_t ch, int device_id, mem_info_t *resp);
- void cudart_release(cudart_handle_t ch);
- #endif // __GPU_INFO_CUDART_H__
- #endif // __APPLE__
|