Explorar el Código

Debug logging for nvcuda init (#7532)

Some users are reporting crashes during nvcuda.dll initialization
on windows.  This should help narrow down where things are going bad.
Daniel Hiltgen hace 5 meses
padre
commit
b111aa5a91
Se han modificado 1 ficheros con 7 adiciones y 0 borrados
  1. 7 0
      discover/gpu_info_nvcuda.c

+ 7 - 0
discover/gpu_info_nvcuda.c

@@ -4,6 +4,7 @@
 #include "gpu_info_nvcuda.h"
 #include "gpu_info_nvcuda.h"
 
 
 void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
 void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
+  LOG(resp->ch.verbose, "initializing %s\n", nvcuda_lib_path);
   CUresult ret;
   CUresult ret;
   resp->err = NULL;
   resp->err = NULL;
   resp->num_devices = 0;
   resp->num_devices = 0;
@@ -57,8 +58,10 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
       resp->cudaErr = -1;
       resp->cudaErr = -1;
       return;
       return;
     }
     }
+    LOG(resp->ch.verbose, "dlsym: %s - %p\n", l[i].s, *l[i].p);
   }
   }
 
 
+  LOG(resp->ch.verbose, "calling cuInit\n");
   ret = (*resp->ch.cuInit)(0);
   ret = (*resp->ch.cuInit)(0);
   if (ret != CUDA_SUCCESS) {
   if (ret != CUDA_SUCCESS) {
     LOG(resp->ch.verbose, "cuInit err: %d\n", ret);
     LOG(resp->ch.verbose, "cuInit err: %d\n", ret);
@@ -75,15 +78,18 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
   resp->ch.driver_minor = 0;
   resp->ch.driver_minor = 0;
 
 
   // Report driver version if we're in verbose mode, ignore errors
   // Report driver version if we're in verbose mode, ignore errors
+  LOG(resp->ch.verbose, "calling cuDriverGetVersion\n");
   ret = (*resp->ch.cuDriverGetVersion)(&version);
   ret = (*resp->ch.cuDriverGetVersion)(&version);
   if (ret != CUDA_SUCCESS) {
   if (ret != CUDA_SUCCESS) {
     LOG(resp->ch.verbose, "cuDriverGetVersion failed: %d\n", ret);
     LOG(resp->ch.verbose, "cuDriverGetVersion failed: %d\n", ret);
   } else {
   } else {
+    LOG(resp->ch.verbose, "raw version 0x%x\n", version);
     resp->ch.driver_major = version / 1000;
     resp->ch.driver_major = version / 1000;
     resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10;
     resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10;
     LOG(resp->ch.verbose, "CUDA driver version: %d.%d\n", resp->ch.driver_major, resp->ch.driver_minor);
     LOG(resp->ch.verbose, "CUDA driver version: %d.%d\n", resp->ch.driver_major, resp->ch.driver_minor);
   }
   }
 
 
+  LOG(resp->ch.verbose, "calling cuDeviceGetCount\n");
   ret = (*resp->ch.cuDeviceGetCount)(&resp->num_devices);
   ret = (*resp->ch.cuDeviceGetCount)(&resp->num_devices);
   if (ret != CUDA_SUCCESS) {
   if (ret != CUDA_SUCCESS) {
     LOG(resp->ch.verbose, "cuDeviceGetCount err: %d\n", ret);
     LOG(resp->ch.verbose, "cuDeviceGetCount err: %d\n", ret);
@@ -94,6 +100,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
     resp->cudaErr = ret;
     resp->cudaErr = ret;
     return;
     return;
   }
   }
+  LOG(resp->ch.verbose, "device count %d\n", resp->num_devices);
 }
 }
 
 
 const int buflen = 256;
 const int buflen = 256;