gpu_info_cudart.h 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. #ifndef __APPLE__
  2. #ifndef __GPU_INFO_CUDART_H__
  3. #define __GPU_INFO_CUDART_H__
  4. #include "gpu_info.h"
  5. // Just enough typedef's to dlopen/dlsym for memory information
  6. typedef enum cudartReturn_enum {
  7. CUDART_SUCCESS = 0,
  8. CUDART_ERROR_INVALID_VALUE = 1,
  9. CUDART_ERROR_MEMORY_ALLOCATION = 2,
  10. CUDART_ERROR_INSUFFICIENT_DRIVER = 35,
  11. // Other values omitted for now...
  12. } cudartReturn_t;
  13. typedef enum cudartDeviceAttr_enum {
  14. cudartDevAttrComputeCapabilityMajor = 75,
  15. cudartDevAttrComputeCapabilityMinor = 76,
  16. // TODO - not yet wired up but may be useful for Jetson or other
  17. // integrated GPU scenarios with shared memory
  18. cudaDevAttrIntegrated = 18
  19. } cudartDeviceAttr_t;
  20. typedef void *cudartDevice_t; // Opaque is sufficient
  21. typedef struct cudartMemory_st {
  22. size_t total;
  23. size_t free;
  24. size_t used;
  25. } cudartMemory_t;
  26. typedef struct cudartDriverVersion {
  27. int major;
  28. int minor;
  29. } cudartDriverVersion_t;
  30. typedef struct cudaUUID {
  31. unsigned char bytes[16];
  32. } cudaUUID_t;
  33. typedef struct cudaDeviceProp {
  34. char name[256]; /**< ASCII string identifying device */
  35. cudaUUID_t uuid; /**< 16-byte unique identifier */
  36. char luid[8]; /**< 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms */
  37. unsigned int luidDeviceNodeMask; /**< LUID device node mask. Value is undefined on TCC and non-Windows platforms */
  38. size_t totalGlobalMem; /**< Global memory available on device in bytes */
  39. size_t sharedMemPerBlock; /**< Shared memory available per block in bytes */
  40. int regsPerBlock; /**< 32-bit registers available per block */
  41. int warpSize; /**< Warp size in threads */
  42. size_t memPitch; /**< Maximum pitch in bytes allowed by memory copies */
  43. int maxThreadsPerBlock; /**< Maximum number of threads per block */
  44. int maxThreadsDim[3]; /**< Maximum size of each dimension of a block */
  45. int maxGridSize[3]; /**< Maximum size of each dimension of a grid */
  46. int clockRate; /**< Clock frequency in kilohertz */
  47. size_t totalConstMem; /**< Constant memory available on device in bytes */
  48. int major; /**< Major compute capability */
  49. int minor; /**< Minor compute capability */
  50. size_t textureAlignment; /**< Alignment requirement for textures */
  51. size_t texturePitchAlignment; /**< Pitch alignment requirement for texture references bound to pitched memory */
  52. int deviceOverlap; /**< Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. */
  53. int multiProcessorCount; /**< Number of multiprocessors on device */
  54. int kernelExecTimeoutEnabled; /**< Specified whether there is a run time limit on kernels */
  55. int integrated; /**< Device is integrated as opposed to discrete */
  56. int canMapHostMemory; /**< Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer */
  57. int computeMode; /**< Compute mode (See ::cudaComputeMode) */
  58. int maxTexture1D; /**< Maximum 1D texture size */
  59. int maxTexture1DMipmap; /**< Maximum 1D mipmapped texture size */
  60. int maxTexture1DLinear; /**< Deprecated, do not use. Use cudaDeviceGetTexture1DLinearMaxWidth() or cuDeviceGetTexture1DLinearMaxWidth() instead. */
  61. int maxTexture2D[2]; /**< Maximum 2D texture dimensions */
  62. int maxTexture2DMipmap[2]; /**< Maximum 2D mipmapped texture dimensions */
  63. int maxTexture2DLinear[3]; /**< Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory */
  64. int maxTexture2DGather[2]; /**< Maximum 2D texture dimensions if texture gather operations have to be performed */
  65. int maxTexture3D[3]; /**< Maximum 3D texture dimensions */
  66. int maxTexture3DAlt[3]; /**< Maximum alternate 3D texture dimensions */
  67. int maxTextureCubemap; /**< Maximum Cubemap texture dimensions */
  68. int maxTexture1DLayered[2]; /**< Maximum 1D layered texture dimensions */
  69. int maxTexture2DLayered[3]; /**< Maximum 2D layered texture dimensions */
  70. int maxTextureCubemapLayered[2];/**< Maximum Cubemap layered texture dimensions */
  71. int maxSurface1D; /**< Maximum 1D surface size */
  72. int maxSurface2D[2]; /**< Maximum 2D surface dimensions */
  73. int maxSurface3D[3]; /**< Maximum 3D surface dimensions */
  74. int maxSurface1DLayered[2]; /**< Maximum 1D layered surface dimensions */
  75. int maxSurface2DLayered[3]; /**< Maximum 2D layered surface dimensions */
  76. int maxSurfaceCubemap; /**< Maximum Cubemap surface dimensions */
  77. int maxSurfaceCubemapLayered[2];/**< Maximum Cubemap layered surface dimensions */
  78. size_t surfaceAlignment; /**< Alignment requirements for surfaces */
  79. int concurrentKernels; /**< Device can possibly execute multiple kernels concurrently */
  80. int ECCEnabled; /**< Device has ECC support enabled */
  81. int pciBusID; /**< PCI bus ID of the device */
  82. int pciDeviceID; /**< PCI device ID of the device */
  83. int pciDomainID; /**< PCI domain ID of the device */
  84. int tccDriver; /**< 1 if device is a Tesla device using TCC driver, 0 otherwise */
  85. int asyncEngineCount; /**< Number of asynchronous engines */
  86. int unifiedAddressing; /**< Device shares a unified address space with the host */
  87. int memoryClockRate; /**< Peak memory clock frequency in kilohertz */
  88. int memoryBusWidth; /**< Global memory bus width in bits */
  89. int l2CacheSize; /**< Size of L2 cache in bytes */
  90. int persistingL2CacheMaxSize; /**< Device's maximum l2 persisting lines capacity setting in bytes */
  91. int maxThreadsPerMultiProcessor;/**< Maximum resident threads per multiprocessor */
  92. int streamPrioritiesSupported; /**< Device supports stream priorities */
  93. int globalL1CacheSupported; /**< Device supports caching globals in L1 */
  94. int localL1CacheSupported; /**< Device supports caching locals in L1 */
  95. size_t sharedMemPerMultiprocessor; /**< Shared memory available per multiprocessor in bytes */
  96. int regsPerMultiprocessor; /**< 32-bit registers available per multiprocessor */
  97. int managedMemory; /**< Device supports allocating managed memory on this system */
  98. int isMultiGpuBoard; /**< Device is on a multi-GPU board */
  99. int multiGpuBoardGroupID; /**< Unique identifier for a group of devices on the same multi-GPU board */
  100. int hostNativeAtomicSupported; /**< Link between the device and the host supports native atomic operations */
  101. int singleToDoublePrecisionPerfRatio; /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */
  102. int pageableMemoryAccess; /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */
  103. int concurrentManagedAccess; /**< Device can coherently access managed memory concurrently with the CPU */
  104. int computePreemptionSupported; /**< Device supports Compute Preemption */
  105. int canUseHostPointerForRegisteredMem; /**< Device can access host registered memory at the same virtual address as the CPU */
  106. int cooperativeLaunch; /**< Device supports launching cooperative kernels via ::cudaLaunchCooperativeKernel */
  107. int cooperativeMultiDeviceLaunch; /**< Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated. */
  108. size_t sharedMemPerBlockOptin; /**< Per device maximum shared memory per block usable by special opt in */
  109. int pageableMemoryAccessUsesHostPageTables; /**< Device accesses pageable memory via the host's page tables */
  110. int directManagedMemAccessFromHost; /**< Host can directly access managed memory on the device without migration. */
  111. int maxBlocksPerMultiProcessor; /**< Maximum number of resident blocks per multiprocessor */
  112. int accessPolicyMaxWindowSize; /**< The maximum value of ::cudaAccessPolicyWindow::num_bytes. */
  113. size_t reservedSharedMemPerBlock; /**< Shared memory reserved by CUDA driver per block in bytes */
  114. } cudaDeviceProp_t;
  115. typedef struct cudart_handle {
  116. void *handle;
  117. uint16_t verbose;
  118. cudartReturn_t (*cudaSetDevice)(int device);
  119. cudartReturn_t (*cudaDeviceSynchronize)(void);
  120. cudartReturn_t (*cudaDeviceReset)(void);
  121. cudartReturn_t (*cudaMemGetInfo)(size_t *, size_t *);
  122. cudartReturn_t (*cudaGetDeviceCount)(int *);
  123. cudartReturn_t (*cudaDeviceGetAttribute)(int* value, cudartDeviceAttr_t attr, int device);
  124. cudartReturn_t (*cudaDriverGetVersion) (int *driverVersion);
  125. cudartReturn_t (*cudaGetDeviceProperties) (cudaDeviceProp_t* prop, int device);
  126. } cudart_handle_t;
  127. typedef struct cudart_init_resp {
  128. char *err; // If err is non-null handle is invalid
  129. cudart_handle_t ch;
  130. int num_devices;
  131. } cudart_init_resp_t;
  132. void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp);
  133. void cudart_check_vram(cudart_handle_t ch, int device_id, mem_info_t *resp);
  134. void cudart_release(cudart_handle_t ch);
  135. #endif // __GPU_INFO_CUDART_H__
  136. #endif // __APPLE__