gpu_info_oneapi.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. #ifndef __APPLE__
  2. #include "gpu_info_oneapi.h"
  3. #include <string.h>
  4. void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
  5. {
  6. ze_result_t ret;
  7. resp->err = NULL;
  8. const int buflen = 256;
  9. char buf[buflen + 1];
  10. int i;
  11. struct lookup
  12. {
  13. char *s;
  14. void **p;
  15. } l[] = {
  16. {"zesInit", (void *)&resp->oh.zesInit},
  17. {"zesDriverGet", (void *)&resp->oh.zesDriverGet},
  18. {"zesDeviceGet", (void *)&resp->oh.zesDeviceGet},
  19. {"zesDeviceGetProperties", (void *)&resp->oh.zesDeviceGetProperties},
  20. {"zesDeviceEnumMemoryModules",
  21. (void *)&resp->oh.zesDeviceEnumMemoryModules},
  22. {"zesMemoryGetProperties", (void *)&resp->oh.zesMemoryGetProperties},
  23. {"zesMemoryGetState", (void *)&resp->oh.zesMemoryGetState},
  24. {NULL, NULL},
  25. };
  26. resp->oh.handle = LOAD_LIBRARY(oneapi_lib_path, RTLD_LAZY);
  27. if (!resp->oh.handle)
  28. {
  29. char *msg = LOAD_ERR();
  30. snprintf(buf, buflen,
  31. "Unable to load %s library to query for Intel GPUs: %s\n",
  32. oneapi_lib_path, msg);
  33. free(msg);
  34. resp->err = strdup(buf);
  35. return;
  36. }
  37. // TODO once we've squashed the remaining corner cases remove this log
  38. LOG(resp->oh.verbose,
  39. "wiring Level-Zero management library functions in %s\n",
  40. oneapi_lib_path);
  41. for (i = 0; l[i].s != NULL; i++)
  42. {
  43. // TODO once we've squashed the remaining corner cases remove this log
  44. LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s);
  45. *l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s);
  46. if (!l[i].p)
  47. {
  48. resp->oh.handle = NULL;
  49. char *msg = LOAD_ERR();
  50. LOG(resp->oh.verbose, "dlerr: %s\n", msg);
  51. UNLOAD_LIBRARY(resp->oh.handle);
  52. snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, msg);
  53. free(msg);
  54. resp->err = strdup(buf);
  55. return;
  56. }
  57. }
  58. ret = (*resp->oh.zesInit)(0);
  59. if (ret != ZE_RESULT_SUCCESS)
  60. {
  61. LOG(resp->oh.verbose, "zesInit err: %d\n", ret);
  62. UNLOAD_LIBRARY(resp->oh.handle);
  63. resp->oh.handle = NULL;
  64. snprintf(buf, buflen, "oneapi vram init failure: %d", ret);
  65. resp->err = strdup(buf);
  66. }
  67. (*resp->oh.zesDriverGet)(&resp->num_devices, NULL);
  68. return;
  69. }
  70. void oneapi_check_vram(oneapi_handle_t h, mem_info_t *resp)
  71. {
  72. ze_result_t ret;
  73. resp->err = NULL;
  74. uint64_t totalMem = 0;
  75. uint64_t usedMem = 0;
  76. const int buflen = 256;
  77. char buf[buflen + 1];
  78. int i, d, m;
  79. if (h.handle == NULL)
  80. {
  81. resp->err = strdup("Level-Zero handle not initialized");
  82. return;
  83. }
  84. uint32_t driversCount = 0;
  85. ret = (*h.zesDriverGet)(&driversCount, NULL);
  86. if (ret != ZE_RESULT_SUCCESS)
  87. {
  88. snprintf(buf, buflen, "unable to get driver count: %d", ret);
  89. resp->err = strdup(buf);
  90. return;
  91. }
  92. LOG(h.verbose, "discovered %d Level-Zero drivers\n", driversCount);
  93. zes_driver_handle_t *allDrivers =
  94. malloc(driversCount * sizeof(zes_driver_handle_t));
  95. (*h.zesDriverGet)(&driversCount, allDrivers);
  96. resp->total = 0;
  97. resp->free = 0;
  98. for (d = 0; d < driversCount; d++)
  99. {
  100. uint32_t deviceCount = 0;
  101. ret = (*h.zesDeviceGet)(allDrivers[d], &deviceCount, NULL);
  102. if (ret != ZE_RESULT_SUCCESS)
  103. {
  104. snprintf(buf, buflen, "unable to get device count: %d", ret);
  105. resp->err = strdup(buf);
  106. free(allDrivers);
  107. return;
  108. }
  109. LOG(h.verbose, "discovered %d Level-Zero devices\n", deviceCount);
  110. zes_device_handle_t *devices =
  111. malloc(deviceCount * sizeof(zes_device_handle_t));
  112. (*h.zesDeviceGet)(allDrivers[d], &deviceCount, devices);
  113. for (i = 0; i < deviceCount; i++)
  114. {
  115. zes_device_ext_properties_t ext_props;
  116. ext_props.stype = ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES;
  117. ext_props.pNext = NULL;
  118. zes_device_properties_t props;
  119. props.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES;
  120. props.pNext = &ext_props;
  121. ret = (*h.zesDeviceGetProperties)(devices[i], &props);
  122. if (ret != ZE_RESULT_SUCCESS)
  123. {
  124. snprintf(buf, buflen, "unable to get device properties: %d", ret);
  125. resp->err = strdup(buf);
  126. free(allDrivers);
  127. free(devices);
  128. return;
  129. }
  130. if (h.verbose)
  131. {
  132. // When in verbose mode, report more information about
  133. // the card we discover.
  134. LOG(h.verbose, "[%d] oneAPI device name: %s\n", i,
  135. props.modelName);
  136. LOG(h.verbose, "[%d] oneAPI brand: %s\n", i,
  137. props.brandName);
  138. LOG(h.verbose, "[%d] oneAPI vendor: %s\n", i,
  139. props.vendorName);
  140. LOG(h.verbose, "[%d] oneAPI S/N: %s\n", i,
  141. props.serialNumber);
  142. LOG(h.verbose, "[%d] oneAPI board number: %s\n", i,
  143. props.boardNumber);
  144. }
  145. uint32_t memCount = 0;
  146. ret = (*h.zesDeviceEnumMemoryModules)(devices[i], &memCount, NULL);
  147. if (ret != ZE_RESULT_SUCCESS)
  148. {
  149. snprintf(buf, buflen,
  150. "unable to enumerate Level-Zero memory modules: %d", ret);
  151. resp->err = strdup(buf);
  152. free(allDrivers);
  153. free(devices);
  154. return;
  155. }
  156. LOG(h.verbose, "discovered %d Level-Zero memory modules\n", memCount);
  157. zes_mem_handle_t *mems = malloc(memCount * sizeof(zes_mem_handle_t));
  158. (*h.zesDeviceEnumMemoryModules)(devices[i], &memCount, mems);
  159. for (m = 0; m < memCount; m++)
  160. {
  161. zes_mem_state_t state;
  162. state.stype = ZES_STRUCTURE_TYPE_MEM_STATE;
  163. state.pNext = NULL;
  164. ret = (*h.zesMemoryGetState)(mems[m], &state);
  165. if (ret != ZE_RESULT_SUCCESS)
  166. {
  167. snprintf(buf, buflen, "unable to get memory state: %d", ret);
  168. resp->err = strdup(buf);
  169. free(allDrivers);
  170. free(devices);
  171. free(mems);
  172. return;
  173. }
  174. resp->total += state.size;
  175. resp->free += state.free;
  176. }
  177. free(mems);
  178. }
  179. free(devices);
  180. }
  181. free(allDrivers);
  182. }
  183. #endif // __APPLE__