0001-cuda.patch 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
  2. From: jmorganca <jmorganca@gmail.com>
  3. Date: Thu, 6 Jun 2024 23:55:47 -0700
  4. Subject: [PATCH] cuda
  5. ---
  6. ggml/include/ggml-cuda.h | 2 ++
  7. ggml/src/ggml-backend.c | 5 +++++
  8. ggml/src/ggml-cuda.cu | 6 ++++--
  9. 3 files changed, 11 insertions(+), 2 deletions(-)
  10. diff --git a/ggml/include/ggml-cuda.h b/ggml/include/ggml-cuda.h
  11. index 71bb6dcf..08be0895 100644
  12. --- a/ggml/include/ggml-cuda.h
  13. +++ b/ggml/include/ggml-cuda.h
  14. @@ -34,6 +34,8 @@ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_typ
  15. // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
  16. GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
  17. +GGML_API GGML_CALL int ggml_backend_cuda_reg_devices();
  18. +
  19. GGML_API GGML_CALL int ggml_backend_cuda_get_device_count(void);
  20. GGML_API GGML_CALL void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
  21. GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
  22. diff --git a/ggml/src/ggml-backend.c b/ggml/src/ggml-backend.c
  23. index ba280e06..d5c3fe49 100644
  24. --- a/ggml/src/ggml-backend.c
  25. +++ b/ggml/src/ggml-backend.c
  26. @@ -83,7 +83,12 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
  27. if (buffer->iface.free_buffer != NULL) {
  28. buffer->iface.free_buffer(buffer);
  29. }
  30. +
  31. +// TODO: this needs to be freed in cuda and hipblas backends because
  32. +// the cuda backend implementation compiled with msvc
  33. +#if !defined(GGML_USE_CUDA) && !defined(GGML_USE_HIPBLAS)
  34. free(buffer);
  35. +#endif
  36. }
  37. size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
  38. diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu
  39. index 6efdab14..809d6ab1 100644
  40. --- a/ggml/src/ggml-cuda.cu
  41. +++ b/ggml/src/ggml-cuda.cu
  42. @@ -469,6 +469,10 @@ GGML_CALL static bool ggml_backend_buffer_is_cuda(ggml_backend_buffer_t buffer)
  43. GGML_CALL static void ggml_backend_cuda_buffer_free_buffer(ggml_backend_buffer_t buffer) {
  44. ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
  45. delete ctx;
  46. +
  47. + // TODO: this needs to be freed in cuda and hipblas backends because
  48. + // the cuda backend implementation compiled with msvc
  49. + free(buffer);
  50. }
  51. GGML_CALL static void * ggml_backend_cuda_buffer_get_base(ggml_backend_buffer_t buffer) {
  52. @@ -3204,8 +3208,6 @@ GGML_CALL static ggml_backend_t ggml_backend_reg_cuda_init(const char * params,
  53. GGML_UNUSED(params);
  54. }
  55. -extern "C" GGML_CALL int ggml_backend_cuda_reg_devices();
  56. -
  57. GGML_CALL int ggml_backend_cuda_reg_devices() {
  58. int device_count = ggml_backend_cuda_get_device_count();
  59. //int device_count = 1; // DEBUG: some tools require delaying CUDA initialization