gguf.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. // This file contains functionality related to "GGUF" files, the binary file format used by ggml.
  2. // GGUF files have the following structure:
  3. //
  4. // 1. File magic "GGUF" (4 bytes).
  5. // 2. File version (uint32_t).
  6. // 3. Number of ggml tensors in file (int64_t).
  7. // 4. Number of key-value-pairs in file (int64_t).
  8. // 5. For each KV pair:
  9. // 1. The key (string).
  10. // 2. The value type (gguf_type).
  11. // 3a. If the value type is GGUF_TYPE_ARRAY:
  12. // 1. The type of the array (gguf_type).
  13. // 2. The number of elements in the array (uint64_t).
  14. // 3. The binary representation of each element in the array.
  15. // 3b. Otherwise:
  16. // 1. The binary representation of the value.
  17. // 6. For each ggml tensor:
  18. // 1. The tensor name (string).
  19. // 2. The number of dimensions of the tensor (uint32_t).
  20. // 3. For each dimension:
  21. // 1. The size of the tensor in the dimension (int64_t).
  22. // 4. The tensor data type (ggml_type).
  23. // 5. The tensor data offset in the tensor data binary blob (uint64_t).
  24. // 7. The tensor data binary blob (optional, aligned).
  25. //
  26. // Strings are serialized as the string length (uint64_t) followed by the C string without the null terminator.
  27. // All enums are stored as int32_t.
  28. // All bool values are stored as int8_t.
  29. // If the special key "general.alignment" (uint32_t) is defined it is used for alignment,
  30. // otherwise GGUF_DEFAULT_ALIGNMENT is used.
  31. //
  32. // Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
  33. #pragma once
  34. #include "ggml.h"
  35. #include <stdbool.h>
  36. #include <stdint.h>
  37. #define GGUF_MAGIC "GGUF"
  38. #define GGUF_VERSION 3
  39. #define GGUF_KEY_GENERAL_ALIGNMENT "general.alignment"
  40. #define GGUF_DEFAULT_ALIGNMENT 32
  41. #ifdef __cplusplus
  42. extern "C" {
  43. #endif
  44. // types that can be stored as GGUF KV data
  45. enum gguf_type {
  46. GGUF_TYPE_UINT8 = 0,
  47. GGUF_TYPE_INT8 = 1,
  48. GGUF_TYPE_UINT16 = 2,
  49. GGUF_TYPE_INT16 = 3,
  50. GGUF_TYPE_UINT32 = 4,
  51. GGUF_TYPE_INT32 = 5,
  52. GGUF_TYPE_FLOAT32 = 6,
  53. GGUF_TYPE_BOOL = 7,
  54. GGUF_TYPE_STRING = 8,
  55. GGUF_TYPE_ARRAY = 9,
  56. GGUF_TYPE_UINT64 = 10,
  57. GGUF_TYPE_INT64 = 11,
  58. GGUF_TYPE_FLOAT64 = 12,
  59. GGUF_TYPE_COUNT, // marks the end of the enum
  60. };
  61. struct gguf_context;
  62. struct gguf_init_params {
  63. bool no_alloc;
  64. // if not NULL, create a ggml_context and allocate the tensor data in it
  65. struct ggml_context ** ctx;
  66. };
  67. GGML_API struct gguf_context * gguf_init_empty(void);
  68. GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
  69. //GGML_API struct gguf_context * gguf_init_from_buffer(..);
  70. GGML_API void gguf_free(struct gguf_context * ctx);
  71. GGML_API const char * gguf_type_name(enum gguf_type type);
  72. GGML_API uint32_t gguf_get_version (const struct gguf_context * ctx);
  73. GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
  74. GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
  75. GGML_API int64_t gguf_get_n_kv(const struct gguf_context * ctx);
  76. GGML_API int64_t gguf_find_key(const struct gguf_context * ctx, const char * key); // returns -1 if key is not found
  77. GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int64_t key_id);
  78. GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int64_t key_id);
  79. GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id);
  80. // will abort if the wrong type is used for the key
  81. GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int64_t key_id);
  82. GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int64_t key_id);
  83. GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int64_t key_id);
  84. GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int64_t key_id);
  85. GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int64_t key_id);
  86. GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int64_t key_id);
  87. GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int64_t key_id);
  88. GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int64_t key_id);
  89. GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int64_t key_id);
  90. GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int64_t key_id);
  91. GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id);
  92. GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int64_t key_id);
  93. GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id);
  94. GGML_API size_t gguf_get_arr_n (const struct gguf_context * ctx, int64_t key_id);
  95. // get raw pointer to the first element of the array with the given key_id
  96. // for bool arrays, note that they are always stored as int8 on all platforms (usually this makes no difference)
  97. GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id);
  98. GGML_API size_t gguf_get_arr_data_n(const struct gguf_context * ctx, int64_t key_id);
  99. // get ith C string from array with given key_id
  100. GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int64_t key_id, size_t i);
  101. GGML_API int64_t gguf_get_n_tensors (const struct gguf_context * ctx);
  102. GGML_API int64_t gguf_find_tensor (const struct gguf_context * ctx, const char * name); // returns -1 if the tensor is not found
  103. GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id);
  104. GGML_API const char * gguf_get_tensor_name (const struct gguf_context * ctx, int64_t tensor_id);
  105. GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int64_t tensor_id);
  106. GGML_API size_t gguf_get_tensor_size (const struct gguf_context * ctx, int64_t tensor_id);
  107. // removes key if it exists, returns id that the key had prior to removal (-1 if it didn't exist)
  108. GGML_API int64_t gguf_remove_key(struct gguf_context * ctx, const char * key);
  109. // overrides an existing KV pair or adds a new one, the new KV pair is always at the back
  110. GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
  111. GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
  112. GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
  113. GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
  114. GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
  115. GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
  116. GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
  117. GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
  118. GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
  119. GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
  120. GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
  121. GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
  122. // creates a new array with n elements of the given type and copies the corresponding number of bytes from data
  123. GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n);
  124. // creates a new array with n strings and copies the corresponding strings from data
  125. GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, size_t n);
  126. // set or add KV pairs from another context
  127. GGML_API void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src);
  128. // add tensor to GGUF context, tensor name must be unique
  129. GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
  130. // after changing a tensor's type, the offsets of all tensors with higher indices are immediately recalculated
  131. // in such a way that the tensor data remains as one contiguous block (except for padding)
  132. GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
  133. // assumes that at least gguf_get_tensor_size bytes can be read from data
  134. GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data);
  135. // writing gguf files can be done in 3 ways:
  136. //
  137. // - write the entire gguf_context to a binary file in a single pass:
  138. //
  139. // gguf_write_to_file(ctx, fname, /*only_meta =*/ false);
  140. //
  141. // - write only the meta data to a file, then re-open the file and append the tensor data:
  142. //
  143. // gguf_write_to_file(ctx, fname, /*only_meta =*/ true);
  144. // FILE * f = fopen(fname, "ab");
  145. // fwrite(f, ...); // write tensor data
  146. // fclose(f);
  147. //
  148. // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
  149. //
  150. // FILE * f = fopen(fname, "wb");
  151. // const size_t size_meta = gguf_get_meta_size(ctx);
  152. // fseek(f, size_meta, SEEK_SET);
  153. // fwrite(f, ...); // write tensor data
  154. // void * data = malloc(size_meta);
  155. // gguf_get_meta_data(ctx, data);
  156. // rewind(f);
  157. // fwrite(data, 1, data, f);
  158. // free(data);
  159. // fclose(f);
  160. //
  161. // write the entire context to a binary file
  162. GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
  163. // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
  164. GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
  165. // writes the meta data to pointer "data"
  166. GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
  167. #ifdef __cplusplus
  168. }
  169. #endif