gpu_info_cudart.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. #ifndef __APPLE__ // TODO - maybe consider nvidia support on intel macs?
  2. #include <string.h>
  3. #include "gpu_info_cudart.h"
  4. void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
  5. cudartReturn_t ret;
  6. resp->err = NULL;
  7. const int buflen = 256;
  8. char buf[buflen + 1];
  9. int i;
  10. struct lookup {
  11. char *s;
  12. void **p;
  13. } l[] = {
  14. {"cudaSetDevice", (void *)&resp->ch.cudaSetDevice},
  15. {"cudaDeviceSynchronize", (void *)&resp->ch.cudaDeviceSynchronize},
  16. {"cudaDeviceReset", (void *)&resp->ch.cudaDeviceReset},
  17. {"cudaMemGetInfo", (void *)&resp->ch.cudaMemGetInfo},
  18. {"cudaGetDeviceCount", (void *)&resp->ch.cudaGetDeviceCount},
  19. {"cudaDeviceGetAttribute", (void *)&resp->ch.cudaDeviceGetAttribute},
  20. {"cudaDriverGetVersion", (void *)&resp->ch.cudaDriverGetVersion},
  21. {NULL, NULL},
  22. };
  23. resp->ch.handle = LOAD_LIBRARY(cudart_lib_path, RTLD_LAZY);
  24. if (!resp->ch.handle) {
  25. char *msg = LOAD_ERR();
  26. LOG(resp->ch.verbose, "library %s load err: %s\n", cudart_lib_path, msg);
  27. snprintf(buf, buflen,
  28. "Unable to load %s library to query for Nvidia GPUs: %s",
  29. cudart_lib_path, msg);
  30. free(msg);
  31. resp->err = strdup(buf);
  32. return;
  33. }
  34. // TODO once we've squashed the remaining corner cases remove this log
  35. LOG(resp->ch.verbose, "wiring cudart library functions in %s\n", cudart_lib_path);
  36. for (i = 0; l[i].s != NULL; i++) {
  37. // TODO once we've squashed the remaining corner cases remove this log
  38. LOG(resp->ch.verbose, "dlsym: %s\n", l[i].s);
  39. *l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
  40. if (!l[i].p) {
  41. char *msg = LOAD_ERR();
  42. LOG(resp->ch.verbose, "dlerr: %s\n", msg);
  43. UNLOAD_LIBRARY(resp->ch.handle);
  44. resp->ch.handle = NULL;
  45. snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
  46. msg);
  47. free(msg);
  48. resp->err = strdup(buf);
  49. return;
  50. }
  51. }
  52. ret = (*resp->ch.cudaSetDevice)(0);
  53. if (ret != CUDART_SUCCESS) {
  54. LOG(resp->ch.verbose, "cudaSetDevice err: %d\n", ret);
  55. UNLOAD_LIBRARY(resp->ch.handle);
  56. resp->ch.handle = NULL;
  57. if (ret == CUDA_ERROR_INSUFFICIENT_DRIVER) {
  58. resp->err = strdup("your nvidia driver is too old or missing, please upgrade to run ollama");
  59. return;
  60. }
  61. snprintf(buf, buflen, "cudart init failure: %d", ret);
  62. resp->err = strdup(buf);
  63. return;
  64. }
  65. int version = 0;
  66. cudartDriverVersion_t driverVersion;
  67. driverVersion.major = 0;
  68. driverVersion.minor = 0;
  69. // Report driver version if we're in verbose mode, ignore errors
  70. ret = (*resp->ch.cudaDriverGetVersion)(&version);
  71. if (ret != CUDART_SUCCESS) {
  72. LOG(resp->ch.verbose, "cudaDriverGetVersion failed: %d\n", ret);
  73. } else {
  74. driverVersion.major = version / 1000;
  75. driverVersion.minor = (version - (driverVersion.major * 1000)) / 10;
  76. LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", driverVersion.major, driverVersion.minor);
  77. }
  78. }
  79. void cudart_check_vram(cudart_handle_t h, mem_info_t *resp) {
  80. resp->err = NULL;
  81. cudartMemory_t memInfo = {0,0,0};
  82. cudartReturn_t ret;
  83. const int buflen = 256;
  84. char buf[buflen + 1];
  85. int i;
  86. if (h.handle == NULL) {
  87. resp->err = strdup("cudart handle isn't initialized");
  88. return;
  89. }
  90. // cudaGetDeviceCount takes int type, resp-> count is uint
  91. int deviceCount;
  92. ret = (*h.cudaGetDeviceCount)(&deviceCount);
  93. if (ret != CUDART_SUCCESS) {
  94. snprintf(buf, buflen, "unable to get device count: %d", ret);
  95. resp->err = strdup(buf);
  96. return;
  97. } else {
  98. resp->count = (unsigned int)deviceCount;
  99. }
  100. resp->total = 0;
  101. resp->free = 0;
  102. for (i = 0; i < resp-> count; i++) {
  103. ret = (*h.cudaSetDevice)(i);
  104. if (ret != CUDART_SUCCESS) {
  105. snprintf(buf, buflen, "cudart device failed to initialize");
  106. resp->err = strdup(buf);
  107. return;
  108. }
  109. ret = (*h.cudaMemGetInfo)(&memInfo.free, &memInfo.total);
  110. if (ret != CUDART_SUCCESS) {
  111. snprintf(buf, buflen, "cudart device memory info lookup failure %d", ret);
  112. resp->err = strdup(buf);
  113. return;
  114. }
  115. LOG(h.verbose, "[%d] CUDA totalMem %lu\n", i, memInfo.total);
  116. LOG(h.verbose, "[%d] CUDA freeMem %lu\n", i, memInfo.free);
  117. resp->total += memInfo.total;
  118. resp->free += memInfo.free;
  119. }
  120. }
  121. void cudart_compute_capability(cudart_handle_t h, cudart_compute_capability_t *resp) {
  122. resp->err = NULL;
  123. resp->major = 0;
  124. resp->minor = 0;
  125. int major = 0;
  126. int minor = 0;
  127. cudartReturn_t ret;
  128. const int buflen = 256;
  129. char buf[buflen + 1];
  130. int i;
  131. if (h.handle == NULL) {
  132. resp->err = strdup("cudart handle not initialized");
  133. return;
  134. }
  135. int devices;
  136. ret = (*h.cudaGetDeviceCount)(&devices);
  137. if (ret != CUDART_SUCCESS) {
  138. snprintf(buf, buflen, "unable to get cudart device count: %d", ret);
  139. resp->err = strdup(buf);
  140. return;
  141. }
  142. for (i = 0; i < devices; i++) {
  143. ret = (*h.cudaSetDevice)(i);
  144. if (ret != CUDART_SUCCESS) {
  145. snprintf(buf, buflen, "cudart device failed to initialize");
  146. resp->err = strdup(buf);
  147. return;
  148. }
  149. ret = (*h.cudaDeviceGetAttribute)(&major, cudartDevAttrComputeCapabilityMajor, i);
  150. if (ret != CUDART_SUCCESS) {
  151. snprintf(buf, buflen, "device compute capability lookup failure %d: %d", i, ret);
  152. resp->err = strdup(buf);
  153. return;
  154. }
  155. ret = (*h.cudaDeviceGetAttribute)(&minor, cudartDevAttrComputeCapabilityMinor, i);
  156. if (ret != CUDART_SUCCESS) {
  157. snprintf(buf, buflen, "device compute capability lookup failure %d: %d", i, ret);
  158. resp->err = strdup(buf);
  159. return;
  160. }
  161. // Report the lowest major.minor we detect as that limits our compatibility
  162. if (resp->major == 0 || resp->major > major ) {
  163. resp->major = major;
  164. resp->minor = minor;
  165. } else if ( resp->major == major && resp->minor > minor ) {
  166. resp->minor = minor;
  167. }
  168. }
  169. }
  170. void cudart_release(cudart_handle_t h) {
  171. LOG(h.verbose, "releasing cudart library\n");
  172. UNLOAD_LIBRARY(h.handle);
  173. h.handle = NULL;
  174. }
  175. #endif // __APPLE__