ggml-backend-impl.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. /**
  2. * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
  3. *
  4. * MIT License
  5. *
  6. * Copyright (c) 2023-2024 The ggml authors
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in all
  16. * copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  24. * SOFTWARE.
  25. */
  26. #pragma once
  27. // ggml-backend internal header
  28. #include "ggml-backend.h"
  29. #ifdef __cplusplus
  30. extern "C" {
  31. #endif
  32. #define GGML_BACKEND_API_VERSION 1
  33. //
  34. // Backend buffer type
  35. //
  36. struct ggml_backend_buffer_type_i {
  37. const char * (*get_name) (ggml_backend_buffer_type_t buft);
  38. // allocate a buffer of this type
  39. ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
  40. // tensor alignment
  41. size_t (*get_alignment) (ggml_backend_buffer_type_t buft);
  42. // (optional) max buffer size that can be allocated (defaults to SIZE_MAX)
  43. size_t (*get_max_size) (ggml_backend_buffer_type_t buft);
  44. // (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes)
  45. size_t (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor);
  46. // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
  47. bool (*is_host) (ggml_backend_buffer_type_t buft);
  48. };
  49. struct ggml_backend_buffer_type {
  50. struct ggml_backend_buffer_type_i iface;
  51. ggml_backend_dev_t device;
  52. void * context;
  53. };
  54. //
  55. // Backend buffer
  56. //
  57. struct ggml_backend_buffer_i {
  58. // (optional) free the buffer
  59. void (*free_buffer) (ggml_backend_buffer_t buffer);
  60. // base address of the buffer
  61. void * (*get_base) (ggml_backend_buffer_t buffer);
  62. // (optional) initialize a tensor in the buffer (eg. add tensor extras)
  63. void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
  64. // tensor data access
  65. void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
  66. void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
  67. void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
  68. // (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported)
  69. bool (*cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst);
  70. // clear the entire buffer
  71. void (*clear) (ggml_backend_buffer_t buffer, uint8_t value);
  72. // (optional) reset any internal state due to tensor initialization, such as tensor extras
  73. void (*reset) (ggml_backend_buffer_t buffer);
  74. };
  75. struct ggml_backend_buffer {
  76. struct ggml_backend_buffer_i iface;
  77. ggml_backend_buffer_type_t buft;
  78. void * context;
  79. size_t size;
  80. enum ggml_backend_buffer_usage usage;
  81. };
  82. GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
  83. ggml_backend_buffer_type_t buft,
  84. struct ggml_backend_buffer_i iface,
  85. void * context,
  86. size_t size);
  87. // do not use directly, use ggml_backend_tensor_copy instead
  88. GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
  89. // multi-buffer
  90. // buffer that contains a collection of buffers
  91. GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
  92. GGML_API bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
  93. GGML_API void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
  94. //
  95. // Backend (stream)
  96. //
  97. struct ggml_backend_i {
  98. const char * (*get_name)(ggml_backend_t backend);
  99. void (*free)(ggml_backend_t backend);
  100. // (optional) asynchronous tensor data access
  101. void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
  102. void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
  103. bool (*cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
  104. // (optional) complete all pending operations (required if the backend supports async operations)
  105. void (*synchronize)(ggml_backend_t backend);
  106. // (optional) graph plans (not used currently)
  107. // compute graph with a plan
  108. ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
  109. void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
  110. // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
  111. void (*graph_plan_update) (ggml_backend_t backend, ggml_backend_graph_plan_t plan, const struct ggml_cgraph * cgraph);
  112. // compute the graph with the plan
  113. enum ggml_status (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
  114. // compute graph (always async if supported by the backend)
  115. enum ggml_status (*graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
  116. // (optional) event synchronization
  117. // record an event on this stream
  118. void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event);
  119. // wait for an event on on a different stream
  120. void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
  121. };
  122. struct ggml_backend {
  123. ggml_guid_t guid;
  124. struct ggml_backend_i iface;
  125. ggml_backend_dev_t device;
  126. void * context;
  127. };
  128. struct ggml_backend_event {
  129. struct ggml_backend_device * device;
  130. void * context;
  131. };
  132. //
  133. // Backend device
  134. //
  135. // Note: if additional properties are needed, we should add a struct with all of them
  136. // the current functions to obtain the properties can remain, since they are more convenient for often used properties
  137. struct ggml_backend_device_i {
  138. // device name: short identifier for this device, such as "CPU" or "CUDA0"
  139. const char * (*get_name)(ggml_backend_dev_t dev);
  140. // device description: short informative description of the device, could be the model name
  141. const char * (*get_description)(ggml_backend_dev_t dev);
  142. // device memory in bytes
  143. void (*get_memory)(ggml_backend_dev_t dev, size_t * free, size_t * total);
  144. // device type
  145. enum ggml_backend_dev_type (*get_type)(ggml_backend_dev_t dev);
  146. // device properties
  147. void (*get_props)(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props);
  148. // backend (stream) initialization
  149. ggml_backend_t (*init_backend)(ggml_backend_dev_t dev, const char * params);
  150. // preferred buffer type
  151. ggml_backend_buffer_type_t (*get_buffer_type)(ggml_backend_dev_t dev);
  152. // (optional) host buffer type (in system memory, typically this is a pinned memory buffer for faster transfers between host and device)
  153. ggml_backend_buffer_type_t (*get_host_buffer_type)(ggml_backend_dev_t dev);
  154. // (optional) buffer from pointer: create a buffer from a host pointer (useful for memory mapped models and importing data from other libraries)
  155. ggml_backend_buffer_t (*buffer_from_host_ptr)(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size);
  156. // check if the backend can compute an operation
  157. bool (*supports_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
  158. // check if the backend can use tensors allocated in a buffer type
  159. bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
  160. // (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
  161. // these should be expensive operations that may benefit from running on this backend instead of the CPU backend
  162. bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
  163. // (optional) event synchronization
  164. ggml_backend_event_t (*event_new) (ggml_backend_dev_t dev);
  165. void (*event_free) (ggml_backend_dev_t dev, ggml_backend_event_t event);
  166. void (*event_synchronize) (ggml_backend_dev_t dev, ggml_backend_event_t event);
  167. };
  168. struct ggml_backend_device {
  169. struct ggml_backend_device_i iface;
  170. ggml_backend_reg_t reg;
  171. void * context;
  172. };
  173. //
  174. // Backend (reg)
  175. //
  176. struct ggml_backend_reg_i {
  177. const char * (*get_name)(ggml_backend_reg_t reg);
  178. // enumerate available devices
  179. size_t (*get_device_count)(ggml_backend_reg_t reg);
  180. ggml_backend_dev_t (*get_device)(ggml_backend_reg_t reg, size_t index);
  181. // (optional) get a pointer to a function in the backend
  182. // backends can add custom functions that are not part of the standard ggml-backend interface
  183. void * (*get_proc_address)(ggml_backend_reg_t reg, const char * name);
  184. };
  185. struct ggml_backend_reg {
  186. int api_version; // initialize to GGML_BACKEND_API_VERSION
  187. struct ggml_backend_reg_i iface;
  188. void * context;
  189. };
  190. // Internal backend registry API
  191. GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
  192. GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
  193. // Add backend dynamic loading support to the backend
  194. // Initialize the backend
  195. typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
  196. // Optional: obtain a score for the backend based on the system configuration
  197. // Higher scores are preferred, 0 means the backend is not supported in the current system
  198. typedef int (*ggml_backend_score_t)(void);
  199. #ifdef GGML_BACKEND_DL
  200. # ifdef __cplusplus
  201. # define GGML_BACKEND_DL_IMPL(reg_fn) \
  202. extern "C" { \
  203. GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
  204. } \
  205. ggml_backend_reg_t ggml_backend_init(void) { \
  206. return reg_fn(); \
  207. }
  208. # define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
  209. extern "C" { \
  210. GGML_BACKEND_API int ggml_backend_score(void); \
  211. } \
  212. int ggml_backend_score(void) { \
  213. return score_fn(); \
  214. }
  215. # else
  216. # define GGML_BACKEND_DL_IMPL(reg_fn) \
  217. GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
  218. ggml_backend_reg_t ggml_backend_init(void) { \
  219. return reg_fn(); \
  220. }
  221. # define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
  222. GGML_BACKEND_API int ggml_backend_score(void); \
  223. int ggml_backend_score(void) { \
  224. return score_fn(); \
  225. }
  226. # endif
  227. #else
  228. # define GGML_BACKEND_DL_IMPL(reg_fn)
  229. # define GGML_BACKEND_DL_SCORE_IMPL(score_fn)
  230. #endif
  231. #ifdef __cplusplus
  232. }
  233. #endif