gpu_info_rocm.c 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. #ifndef __APPLE__
  2. #include "gpu_info_rocm.h"
  3. #include <string.h>
  4. void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) {
  5. rsmi_status_t ret;
  6. resp->err = NULL;
  7. const int buflen = 256;
  8. char buf[buflen + 1];
  9. int i;
  10. struct lookup {
  11. char *s;
  12. void **p;
  13. } l[4] = {
  14. {"rsmi_init", (void *)&resp->rh.initFn},
  15. {"rsmi_shut_down", (void *)&resp->rh.shutdownFn},
  16. {"rsmi_dev_memory_total_get", (void *)&resp->rh.totalMemFn},
  17. {"rsmi_dev_memory_usage_get", (void *)&resp->rh.usageMemFn},
  18. // { "rsmi_dev_id_get", (void*)&resp->rh.getHandle },
  19. };
  20. resp->rh.handle = LOAD_LIBRARY(rocm_lib_path, RTLD_LAZY);
  21. if (!resp->rh.handle) {
  22. char *msg = LOAD_ERR();
  23. snprintf(buf, buflen,
  24. "Unable to load %s library to query for Radeon GPUs: %s\n",
  25. rocm_lib_path, msg);
  26. free(msg);
  27. resp->err = strdup(buf);
  28. return;
  29. }
  30. for (i = 0; i < 4; i++) {
  31. *l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s);
  32. if (!l[i].p) {
  33. UNLOAD_LIBRARY(resp->rh.handle);
  34. resp->rh.handle = NULL;
  35. char *msg = LOAD_ERR();
  36. snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
  37. msg);
  38. free(msg);
  39. resp->err = strdup(buf);
  40. return;
  41. }
  42. }
  43. ret = (*resp->rh.initFn)(0);
  44. if (ret != RSMI_STATUS_SUCCESS) {
  45. UNLOAD_LIBRARY(resp->rh.handle);
  46. resp->rh.handle = NULL;
  47. snprintf(buf, buflen, "rocm vram init failure: %d", ret);
  48. resp->err = strdup(buf);
  49. }
  50. return;
  51. }
  52. void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
  53. resp->err = NULL;
  54. // uint32_t num_devices;
  55. // uint16_t device;
  56. uint64_t totalMem = 0;
  57. uint64_t usedMem = 0;
  58. rsmi_status_t ret;
  59. const int buflen = 256;
  60. char buf[buflen + 1];
  61. int i;
  62. if (h.handle == NULL) {
  63. resp->err = strdup("rocm handle not initialized");
  64. return;
  65. }
  66. // TODO - iterate through devices... ret =
  67. // rsmi_num_monitor_devices(&num_devices);
  68. // ret = (*h.getHandle)(0, &device);
  69. // if (ret != RSMI_STATUS_SUCCESS) {
  70. // printf("rocm vram device lookup failure: %d\n", ret);
  71. // return -1;
  72. // }
  73. // Get total memory - used memory for available memory
  74. ret = (*h.totalMemFn)(0, RSMI_MEM_TYPE_VRAM, &totalMem);
  75. if (ret != RSMI_STATUS_SUCCESS) {
  76. snprintf(buf, buflen, "rocm total mem lookup failure: %d", ret);
  77. resp->err = strdup(buf);
  78. return;
  79. }
  80. ret = (*h.usageMemFn)(0, RSMI_MEM_TYPE_VRAM, &usedMem);
  81. if (ret != RSMI_STATUS_SUCCESS) {
  82. snprintf(buf, buflen, "rocm usage mem lookup failure: %d", ret);
  83. resp->err = strdup(buf);
  84. return;
  85. }
  86. // TODO: set this to the actual number of devices
  87. resp->count = 1;
  88. resp->total = totalMem;
  89. resp->free = totalMem - usedMem;
  90. return;
  91. }
  92. #endif // __APPLE__