binding.h 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. // MIT License
  2. // Copyright (c) 2023 go-skynet authors
  3. // Permission is hereby granted, free of charge, to any person obtaining a copy
  4. // of this software and associated documentation files (the "Software"), to deal
  5. // in the Software without restriction, including without limitation the rights
  6. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. // copies of the Software, and to permit persons to whom the Software is
  8. // furnished to do so, subject to the following conditions:
  9. // The above copyright notice and this permission notice shall be included in
  10. // all copies or substantial portions of the Software.
  11. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  12. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  13. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  14. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  15. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  16. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  17. // SOFTWARE.
  18. #ifdef __cplusplus
  19. #include <string>
  20. #include <vector>
  21. extern "C" {
  22. #endif
  23. #include <stdbool.h>
  24. extern unsigned char tokenCallback(void *, char *);
  25. int load_state(void *ctx, char *statefile, char *modes);
  26. int eval(void *params_ptr, void *ctx, char *text);
  27. void save_state(void *ctx, char *dst, char *modes);
  28. void *load_model(const char *fname, int n_ctx, int n_seed, bool memory_f16,
  29. bool mlock, bool embeddings, bool mmap, bool low_vram,
  30. bool vocab_only, int n_gpu, int n_batch, const char *maingpu,
  31. const char *tensorsplit, bool numa);
  32. int get_embeddings(void *params_ptr, void *state_pr, float *res_embeddings);
  33. int get_token_embeddings(void *params_ptr, void *state_pr, int *tokens,
  34. int tokenSize, float *res_embeddings);
  35. void *llama_allocate_params(
  36. const char *prompt, int seed, int threads, int tokens, int top_k,
  37. float top_p, float temp, float repeat_penalty, int repeat_last_n,
  38. bool ignore_eos, bool memory_f16, int n_batch, int n_keep,
  39. const char **antiprompt, int antiprompt_count, float tfs_z, float typical_p,
  40. float frequency_penalty, float presence_penalty, int mirostat,
  41. float mirostat_eta, float mirostat_tau, bool penalize_nl,
  42. const char *logit_bias, const char *session_file, bool prompt_cache_all,
  43. bool mlock, bool mmap, const char *maingpu, const char *tensorsplit,
  44. bool prompt_cache_ro);
  45. void llama_free_params(void *params_ptr);
  46. void llama_binding_free_model(void *state);
  47. int llama_predict(void *params_ptr, void *state_pr, char *result, bool debug);
  48. #ifdef __cplusplus
  49. }
  50. std::vector<std::string> create_vector(const char **strings, int count);
  51. void delete_vector(std::vector<std::string> *vec);
  52. #endif