ggml-backend-impl.h 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. #pragma once
  2. // ggml-backend internal header
  3. #include "ggml-backend.h"
  4. #ifdef __cplusplus
  5. extern "C" {
  6. #endif
  7. //
  8. // Backend buffer
  9. //
  10. // buffer type
  11. typedef void * ggml_backend_buffer_type_context_t;
  12. struct ggml_backend_buffer_type_i {
  13. const char * (*GGML_CALL get_name) (ggml_backend_buffer_type_t buft);
  14. ggml_backend_buffer_t (*GGML_CALL alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
  15. size_t (*GGML_CALL get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment
  16. size_t (*GGML_CALL get_max_size) (ggml_backend_buffer_type_t buft); // allocation max size
  17. size_t (*GGML_CALL get_alloc_size) (ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
  18. bool (*GGML_CALL supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend
  19. // check if tensor data is in host memory
  20. // should be equivalent to supports_backend(buft, ggml_backend_cpu_init())
  21. bool (*GGML_CALL is_host) (ggml_backend_buffer_type_t buft);
  22. };
  23. struct ggml_backend_buffer_type {
  24. struct ggml_backend_buffer_type_i iface;
  25. ggml_backend_buffer_type_context_t context;
  26. };
  27. // buffer
  28. typedef void * ggml_backend_buffer_context_t;
  29. struct ggml_backend_buffer_i {
  30. const char * (*GGML_CALL get_name) (ggml_backend_buffer_t buffer);
  31. void (*GGML_CALL free_buffer)(ggml_backend_buffer_t buffer);
  32. void * (*GGML_CALL get_base) (ggml_backend_buffer_t buffer);
  33. void (*GGML_CALL init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
  34. void (*GGML_CALL set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
  35. void (*GGML_CALL get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
  36. bool (*GGML_CALL cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); // dst is in the buffer, src may be in any buffer
  37. void (*GGML_CALL clear) (ggml_backend_buffer_t buffer, uint8_t value);
  38. void (*GGML_CALL reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
  39. };
  40. struct ggml_backend_buffer {
  41. struct ggml_backend_buffer_i iface;
  42. ggml_backend_buffer_type_t buft;
  43. ggml_backend_buffer_context_t context;
  44. size_t size;
  45. enum ggml_backend_buffer_usage usage;
  46. };
  47. GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init(
  48. ggml_backend_buffer_type_t buft,
  49. struct ggml_backend_buffer_i iface,
  50. ggml_backend_buffer_context_t context,
  51. size_t size);
  52. // do not use directly, use ggml_backend_tensor_copy instead
  53. bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
  54. // buffer that contains a collection of buffers
  55. GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
  56. GGML_CALL bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
  57. GGML_CALL void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
  58. //
  59. // Backend
  60. //
  61. typedef void * ggml_backend_context_t;
  62. struct ggml_backend_i {
  63. const char * (*GGML_CALL get_name)(ggml_backend_t backend);
  64. void (*GGML_CALL free)(ggml_backend_t backend);
  65. // buffer allocation
  66. ggml_backend_buffer_type_t (*GGML_CALL get_default_buffer_type)(ggml_backend_t backend);
  67. // (optional) asynchronous tensor data access
  68. void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
  69. void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
  70. bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
  71. // (optional) complete all pending operations
  72. void (*GGML_CALL synchronize)(ggml_backend_t backend);
  73. // compute graph with a plan (not used currently)
  74. ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
  75. void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
  76. // compute graph with a plan
  77. enum ggml_status (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
  78. // compute graph without a plan (async)
  79. enum ggml_status (*GGML_CALL graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
  80. // check if the backend supports an operation
  81. bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
  82. // check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
  83. // these should be expensive operations with large batch sizes that may benefit from running on this backend
  84. // even if the weight has to be copied from the CPU temporarily
  85. bool (*GGML_CALL offload_op)(ggml_backend_t backend, const struct ggml_tensor * op);
  86. // (optional) event synchronization
  87. ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend);
  88. void (*GGML_CALL event_free) (ggml_backend_event_t event);
  89. void (*GGML_CALL event_record) (ggml_backend_event_t event);
  90. void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
  91. void (*GGML_CALL event_synchronize) (ggml_backend_event_t event);
  92. };
  93. struct ggml_backend {
  94. ggml_guid_t guid;
  95. struct ggml_backend_i iface;
  96. ggml_backend_context_t context;
  97. };
  98. struct ggml_backend_event {
  99. ggml_backend_t backend;
  100. void * context;
  101. };
  102. //
  103. // Backend registry
  104. //
  105. typedef ggml_backend_t (*GGML_CALL ggml_backend_init_fn)(const char * params, void * user_data);
  106. GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data);
  107. #ifdef __cplusplus
  108. }
  109. #endif