gpu_info_oneapi.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. #ifndef __APPLE__
  2. #include "gpu_info_oneapi.h"
  3. #include <string.h>
  4. void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
  5. {
  6. ze_result_t ret;
  7. resp->err = NULL;
  8. resp->oh.devices = NULL;
  9. resp->oh.num_devices = NULL;
  10. resp->oh.drivers = NULL;
  11. resp->oh.num_drivers = 0;
  12. const int buflen = 256;
  13. char buf[buflen + 1];
  14. int i, d, count;
  15. struct lookup
  16. {
  17. char *s;
  18. void **p;
  19. } l[] = {
  20. {"zesInit", (void *)&resp->oh.zesInit},
  21. {"zesDriverGet", (void *)&resp->oh.zesDriverGet},
  22. {"zesDeviceGet", (void *)&resp->oh.zesDeviceGet},
  23. {"zesDeviceGetProperties", (void *)&resp->oh.zesDeviceGetProperties},
  24. {"zesDeviceEnumMemoryModules",
  25. (void *)&resp->oh.zesDeviceEnumMemoryModules},
  26. {"zesMemoryGetProperties", (void *)&resp->oh.zesMemoryGetProperties},
  27. {"zesMemoryGetState", (void *)&resp->oh.zesMemoryGetState},
  28. {NULL, NULL},
  29. };
  30. resp->oh.handle = LOAD_LIBRARY(oneapi_lib_path, RTLD_LAZY);
  31. if (!resp->oh.handle)
  32. {
  33. char *msg = LOAD_ERR();
  34. snprintf(buf, buflen,
  35. "Unable to load %s library to query for Intel GPUs: %s\n",
  36. oneapi_lib_path, msg);
  37. free(msg);
  38. resp->err = strdup(buf);
  39. return;
  40. }
  41. // TODO once we've squashed the remaining corner cases remove this log
  42. LOG(resp->oh.verbose,
  43. "wiring Level-Zero management library functions in %s\n",
  44. oneapi_lib_path);
  45. for (i = 0; l[i].s != NULL; i++)
  46. {
  47. // TODO once we've squashed the remaining corner cases remove this log
  48. LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s);
  49. *l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s);
  50. if (!l[i].p)
  51. {
  52. resp->oh.handle = NULL;
  53. char *msg = LOAD_ERR();
  54. LOG(resp->oh.verbose, "dlerr: %s\n", msg);
  55. UNLOAD_LIBRARY(resp->oh.handle);
  56. snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, msg);
  57. free(msg);
  58. resp->err = strdup(buf);
  59. return;
  60. }
  61. }
  62. ret = (*resp->oh.zesInit)(0);
  63. if (ret != ZE_RESULT_SUCCESS)
  64. {
  65. LOG(resp->oh.verbose, "zesInit err: %x\n", ret);
  66. snprintf(buf, buflen, "oneapi vram init failure: %x", ret);
  67. resp->err = strdup(buf);
  68. oneapi_release(resp->oh);
  69. return;
  70. }
  71. count = 0;
  72. ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, NULL);
  73. if (ret != ZE_RESULT_SUCCESS)
  74. {
  75. LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
  76. snprintf(buf, buflen, "unable to get driver count: %x", ret);
  77. resp->err = strdup(buf);
  78. oneapi_release(resp->oh);
  79. return;
  80. }
  81. LOG(resp->oh.verbose, "oneapi driver count: %d\n", resp->oh.num_drivers);
  82. resp->oh.drivers = malloc(resp->oh.num_drivers * sizeof(zes_driver_handle_t));
  83. resp->oh.num_devices = malloc(resp->oh.num_drivers * sizeof(uint32_t));
  84. memset(&resp->oh.num_devices[0], 0, resp->oh.num_drivers * sizeof(uint32_t));
  85. resp->oh.devices = malloc(resp->oh.num_drivers * sizeof(zes_device_handle_t*));
  86. ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, &resp->oh.drivers[0]);
  87. if (ret != ZE_RESULT_SUCCESS)
  88. {
  89. LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
  90. snprintf(buf, buflen, "unable to get driver count: %x", ret);
  91. resp->err = strdup(buf);
  92. oneapi_release(resp->oh);
  93. return;
  94. }
  95. for (d = 0; d < resp->oh.num_drivers; d++) {
  96. ret = (*resp->oh.zesDeviceGet)(resp->oh.drivers[d], &resp->oh.num_devices[d], NULL);
  97. if (ret != ZE_RESULT_SUCCESS)
  98. {
  99. LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
  100. snprintf(buf, buflen, "unable to get device count: %x", ret);
  101. resp->err = strdup(buf);
  102. oneapi_release(resp->oh);
  103. return;
  104. }
  105. resp->oh.devices[d] = malloc(resp->oh.num_devices[d] * sizeof(zes_device_handle_t));
  106. ret = (*resp->oh.zesDeviceGet)(resp->oh.drivers[d], &resp->oh.num_devices[d], resp->oh.devices[d]);
  107. if (ret != ZE_RESULT_SUCCESS)
  108. {
  109. LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
  110. snprintf(buf, buflen, "unable to get device count: %x", ret);
  111. resp->err = strdup(buf);
  112. oneapi_release(resp->oh);
  113. return;
  114. }
  115. count += resp->oh.num_devices[d];
  116. }
  117. return;
  118. }
  119. void oneapi_check_vram(oneapi_handle_t h, int driver, int device, mem_info_t *resp)
  120. {
  121. ze_result_t ret;
  122. resp->err = NULL;
  123. uint64_t totalMem = 0;
  124. uint64_t usedMem = 0;
  125. const int buflen = 256;
  126. char buf[buflen + 1];
  127. int i, d, m;
  128. if (h.handle == NULL)
  129. {
  130. resp->err = strdup("Level-Zero handle not initialized");
  131. return;
  132. }
  133. if (driver > h.num_drivers || device > h.num_devices[driver]) {
  134. resp->err = strdup("driver of device index out of bounds");
  135. return;
  136. }
  137. resp->total = 0;
  138. resp->free = 0;
  139. zes_device_ext_properties_t ext_props;
  140. ext_props.stype = ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES;
  141. ext_props.pNext = NULL;
  142. zes_device_properties_t props;
  143. props.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES;
  144. props.pNext = &ext_props;
  145. ret = (*h.zesDeviceGetProperties)(h.devices[driver][device], &props);
  146. if (ret != ZE_RESULT_SUCCESS)
  147. {
  148. snprintf(buf, buflen, "unable to get device properties: %d", ret);
  149. resp->err = strdup(buf);
  150. return;
  151. }
  152. snprintf(&resp->gpu_name[0], GPU_NAME_LEN, props.modelName);
  153. // TODO this needs to map to ONEAPI_DEVICE_SELECTOR syntax
  154. // (this is probably wrong...)
  155. // TODO - the driver isn't included - what if there are multiple drivers?
  156. snprintf(&resp->gpu_id[0], GPU_ID_LEN, "%d", device);
  157. if (h.verbose)
  158. {
  159. // When in verbose mode, report more information about
  160. // the card we discover.
  161. LOG(h.verbose, "[%d:%d] oneAPI device name: %s\n", driver, device,
  162. props.modelName);
  163. LOG(h.verbose, "[%d:%d] oneAPI brand: %s\n", driver, device,
  164. props.brandName);
  165. LOG(h.verbose, "[%d:%d] oneAPI vendor: %s\n", driver, device,
  166. props.vendorName);
  167. LOG(h.verbose, "[%d:%d] oneAPI S/N: %s\n", driver, device,
  168. props.serialNumber);
  169. LOG(h.verbose, "[%d:%d] oneAPI board number: %s\n", driver, device,
  170. props.boardNumber);
  171. }
  172. // TODO
  173. // Compute Capability equivalent in resp->major, resp->minor, resp->patch
  174. uint32_t memCount = 0;
  175. ret = (*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount, NULL);
  176. if (ret != ZE_RESULT_SUCCESS)
  177. {
  178. snprintf(buf, buflen,
  179. "unable to enumerate Level-Zero memory modules: %x", ret);
  180. resp->err = strdup(buf);
  181. return;
  182. }
  183. LOG(h.verbose, "discovered %d Level-Zero memory modules\n", memCount);
  184. zes_mem_handle_t *mems = malloc(memCount * sizeof(zes_mem_handle_t));
  185. (*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount, mems);
  186. for (m = 0; m < memCount; m++)
  187. {
  188. zes_mem_state_t state;
  189. state.stype = ZES_STRUCTURE_TYPE_MEM_STATE;
  190. state.pNext = NULL;
  191. ret = (*h.zesMemoryGetState)(mems[m], &state);
  192. if (ret != ZE_RESULT_SUCCESS)
  193. {
  194. snprintf(buf, buflen, "unable to get memory state: %x", ret);
  195. resp->err = strdup(buf);
  196. free(mems);
  197. return;
  198. }
  199. resp->total += state.size;
  200. resp->free += state.free;
  201. }
  202. free(mems);
  203. }
  204. void oneapi_release(oneapi_handle_t h)
  205. {
  206. int d;
  207. LOG(h.verbose, "releasing oneapi library\n");
  208. for (d = 0; d < h.num_drivers; d++)
  209. {
  210. if (h.devices != NULL && h.devices[d] != NULL)
  211. {
  212. free(h.devices[d]);
  213. }
  214. }
  215. if (h.devices != NULL)
  216. {
  217. free(h.devices);
  218. h.devices = NULL;
  219. }
  220. if (h.num_devices != NULL)
  221. {
  222. free(h.num_devices);
  223. h.num_devices = NULL;
  224. }
  225. if (h.drivers != NULL)
  226. {
  227. free(h.drivers);
  228. h.drivers = NULL;
  229. }
  230. h.num_drivers = 0;
  231. UNLOAD_LIBRARY(h.handle);
  232. h.handle = NULL;
  233. }
  234. int oneapi_get_device_count(oneapi_handle_t h, int driver)
  235. {
  236. if (h.handle == NULL || h.num_devices == NULL)
  237. {
  238. return 0;
  239. }
  240. if (driver > h.num_drivers)
  241. {
  242. return 0;
  243. }
  244. return (int)h.num_devices[driver];
  245. }
  246. #endif // __APPLE__