gpu_info_oneapi.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. #ifndef __APPLE__
  2. #include "gpu_info_oneapi.h"
  3. #include <string.h>
  4. void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp) {
  5. ze_result_t ret;
  6. resp->err = NULL;
  7. resp->oh.devices = NULL;
  8. resp->oh.num_devices = NULL;
  9. resp->oh.drivers = NULL;
  10. resp->oh.num_drivers = 0;
  11. const int buflen = 256;
  12. char buf[buflen + 1];
  13. int i, d, count;
  14. struct lookup {
  15. char *s;
  16. void **p;
  17. } l[] = {
  18. {"zesInit", (void *)&resp->oh.zesInit},
  19. {"zesDriverGet", (void *)&resp->oh.zesDriverGet},
  20. {"zesDeviceGet", (void *)&resp->oh.zesDeviceGet},
  21. {"zesDeviceGetProperties", (void *)&resp->oh.zesDeviceGetProperties},
  22. {"zesDeviceEnumMemoryModules",
  23. (void *)&resp->oh.zesDeviceEnumMemoryModules},
  24. {"zesMemoryGetProperties", (void *)&resp->oh.zesMemoryGetProperties},
  25. {"zesMemoryGetState", (void *)&resp->oh.zesMemoryGetState},
  26. {NULL, NULL},
  27. };
  28. resp->oh.handle = LOAD_LIBRARY(oneapi_lib_path, RTLD_LAZY);
  29. if (!resp->oh.handle) {
  30. char *msg = LOAD_ERR();
  31. snprintf(buf, buflen,
  32. "Unable to load %s library to query for Intel GPUs: %s\n",
  33. oneapi_lib_path, msg);
  34. free(msg);
  35. resp->err = strdup(buf);
  36. return;
  37. }
  38. // TODO once we've squashed the remaining corner cases remove this log
  39. LOG(resp->oh.verbose,
  40. "wiring Level-Zero management library functions in %s\n",
  41. oneapi_lib_path);
  42. for (i = 0; l[i].s != NULL; i++) {
  43. // TODO once we've squashed the remaining corner cases remove this log
  44. LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s);
  45. *l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s);
  46. if (!l[i].p) {
  47. resp->oh.handle = NULL;
  48. char *msg = LOAD_ERR();
  49. LOG(resp->oh.verbose, "dlerr: %s\n", msg);
  50. UNLOAD_LIBRARY(resp->oh.handle);
  51. snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, msg);
  52. free(msg);
  53. resp->err = strdup(buf);
  54. return;
  55. }
  56. }
  57. ret = (*resp->oh.zesInit)(0);
  58. if (ret != ZE_RESULT_SUCCESS) {
  59. LOG(resp->oh.verbose, "zesInit err: %x\n", ret);
  60. snprintf(buf, buflen, "oneapi vram init failure: %x", ret);
  61. resp->err = strdup(buf);
  62. oneapi_release(resp->oh);
  63. return;
  64. }
  65. count = 0;
  66. ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, NULL);
  67. if (ret != ZE_RESULT_SUCCESS) {
  68. LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
  69. snprintf(buf, buflen, "unable to get driver count: %x", ret);
  70. resp->err = strdup(buf);
  71. oneapi_release(resp->oh);
  72. return;
  73. }
  74. LOG(resp->oh.verbose, "oneapi driver count: %d\n", resp->oh.num_drivers);
  75. resp->oh.drivers = malloc(resp->oh.num_drivers * sizeof(zes_driver_handle_t));
  76. resp->oh.num_devices = malloc(resp->oh.num_drivers * sizeof(uint32_t));
  77. memset(&resp->oh.num_devices[0], 0, resp->oh.num_drivers * sizeof(uint32_t));
  78. resp->oh.devices =
  79. malloc(resp->oh.num_drivers * sizeof(zes_device_handle_t *));
  80. ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, &resp->oh.drivers[0]);
  81. if (ret != ZE_RESULT_SUCCESS) {
  82. LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
  83. snprintf(buf, buflen, "unable to get driver count: %x", ret);
  84. resp->err = strdup(buf);
  85. oneapi_release(resp->oh);
  86. return;
  87. }
  88. for (d = 0; d < resp->oh.num_drivers; d++) {
  89. ret = (*resp->oh.zesDeviceGet)(resp->oh.drivers[d],
  90. &resp->oh.num_devices[d], NULL);
  91. if (ret != ZE_RESULT_SUCCESS) {
  92. LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
  93. snprintf(buf, buflen, "unable to get device count: %x", ret);
  94. resp->err = strdup(buf);
  95. oneapi_release(resp->oh);
  96. return;
  97. }
  98. resp->oh.devices[d] =
  99. malloc(resp->oh.num_devices[d] * sizeof(zes_device_handle_t));
  100. ret = (*resp->oh.zesDeviceGet)(
  101. resp->oh.drivers[d], &resp->oh.num_devices[d], resp->oh.devices[d]);
  102. if (ret != ZE_RESULT_SUCCESS) {
  103. LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
  104. snprintf(buf, buflen, "unable to get device count: %x", ret);
  105. resp->err = strdup(buf);
  106. oneapi_release(resp->oh);
  107. return;
  108. }
  109. count += resp->oh.num_devices[d];
  110. }
  111. return;
  112. }
  113. void oneapi_check_vram(oneapi_handle_t h, int driver, int device,
  114. mem_info_t *resp) {
  115. ze_result_t ret;
  116. resp->err = NULL;
  117. uint64_t totalMem = 0;
  118. uint64_t usedMem = 0;
  119. const int buflen = 256;
  120. char buf[buflen + 1];
  121. int i, d, m;
  122. if (h.handle == NULL) {
  123. resp->err = strdup("Level-Zero handle not initialized");
  124. return;
  125. }
  126. if (driver > h.num_drivers || device > h.num_devices[driver]) {
  127. resp->err = strdup("driver of device index out of bounds");
  128. return;
  129. }
  130. resp->total = 0;
  131. resp->free = 0;
  132. zes_device_ext_properties_t ext_props;
  133. ext_props.stype = ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES;
  134. ext_props.pNext = NULL;
  135. zes_device_properties_t props;
  136. props.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES;
  137. props.pNext = &ext_props;
  138. ret = (*h.zesDeviceGetProperties)(h.devices[driver][device], &props);
  139. if (ret != ZE_RESULT_SUCCESS) {
  140. snprintf(buf, buflen, "unable to get device properties: %d", ret);
  141. resp->err = strdup(buf);
  142. return;
  143. }
  144. snprintf(&resp->gpu_name[0], GPU_NAME_LEN, props.modelName);
  145. // TODO this needs to map to ONEAPI_DEVICE_SELECTOR syntax
  146. // (this is probably wrong...)
  147. // TODO - the driver isn't included - what if there are multiple drivers?
  148. snprintf(&resp->gpu_id[0], GPU_ID_LEN, "%d", device);
  149. if (h.verbose) {
  150. // When in verbose mode, report more information about
  151. // the card we discover.
  152. LOG(h.verbose, "[%d:%d] oneAPI device name: %s\n", driver, device,
  153. props.modelName);
  154. LOG(h.verbose, "[%d:%d] oneAPI brand: %s\n", driver, device,
  155. props.brandName);
  156. LOG(h.verbose, "[%d:%d] oneAPI vendor: %s\n", driver, device,
  157. props.vendorName);
  158. LOG(h.verbose, "[%d:%d] oneAPI S/N: %s\n", driver, device,
  159. props.serialNumber);
  160. LOG(h.verbose, "[%d:%d] oneAPI board number: %s\n", driver, device,
  161. props.boardNumber);
  162. }
  163. // TODO
  164. // Compute Capability equivalent in resp->major, resp->minor, resp->patch
  165. uint32_t memCount = 0;
  166. ret = (*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount,
  167. NULL);
  168. if (ret != ZE_RESULT_SUCCESS) {
  169. snprintf(buf, buflen, "unable to enumerate Level-Zero memory modules: %x",
  170. ret);
  171. resp->err = strdup(buf);
  172. return;
  173. }
  174. LOG(h.verbose, "discovered %d Level-Zero memory modules\n", memCount);
  175. zes_mem_handle_t *mems = malloc(memCount * sizeof(zes_mem_handle_t));
  176. (*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount, mems);
  177. for (m = 0; m < memCount; m++) {
  178. zes_mem_state_t state;
  179. state.stype = ZES_STRUCTURE_TYPE_MEM_STATE;
  180. state.pNext = NULL;
  181. ret = (*h.zesMemoryGetState)(mems[m], &state);
  182. if (ret != ZE_RESULT_SUCCESS) {
  183. snprintf(buf, buflen, "unable to get memory state: %x", ret);
  184. resp->err = strdup(buf);
  185. free(mems);
  186. return;
  187. }
  188. resp->total += state.size;
  189. resp->free += state.free;
  190. }
  191. free(mems);
  192. }
  193. void oneapi_release(oneapi_handle_t h) {
  194. int d;
  195. LOG(h.verbose, "releasing oneapi library\n");
  196. for (d = 0; d < h.num_drivers; d++) {
  197. if (h.devices != NULL && h.devices[d] != NULL) {
  198. free(h.devices[d]);
  199. }
  200. }
  201. if (h.devices != NULL) {
  202. free(h.devices);
  203. h.devices = NULL;
  204. }
  205. if (h.num_devices != NULL) {
  206. free(h.num_devices);
  207. h.num_devices = NULL;
  208. }
  209. if (h.drivers != NULL) {
  210. free(h.drivers);
  211. h.drivers = NULL;
  212. }
  213. h.num_drivers = 0;
  214. UNLOAD_LIBRARY(h.handle);
  215. h.handle = NULL;
  216. }
  217. int oneapi_get_device_count(oneapi_handle_t h, int driver) {
  218. if (h.handle == NULL || h.num_devices == NULL) {
  219. return 0;
  220. }
  221. if (driver > h.num_drivers) {
  222. return 0;
  223. }
  224. return (int)h.num_devices[driver];
  225. }
  226. #endif // __APPLE__