llama-adapter.h 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. /**
  2. * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
  3. *
  4. * MIT License
  5. *
  6. * Copyright (c) 2023-2024 The ggml authors
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in all
  16. * copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  24. * SOFTWARE.
  25. */
  26. #pragma once
  27. #include "llama-impl.h"
  28. #include "llama-hparams.h"
  29. #include "ggml-cpp.h"
  30. #include <unordered_map>
  31. #include <vector>
  32. //
  33. // llama_adapter_cvec
  34. //
  35. // TODO: rename to llama_adapter_cvec
  36. struct llama_control_vector {
  37. std::vector<ggml_context_ptr> ctxs;
  38. std::vector<ggml_backend_buffer_ptr> bufs;
  39. std::vector<struct ggml_tensor *> tensors; // per layer
  40. int32_t layer_start = -1;
  41. int32_t layer_end = -1;
  42. struct ggml_tensor * tensor_for(int il) const;
  43. struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const;
  44. };
  45. int32_t llama_control_vector_apply(
  46. struct llama_control_vector & cvec,
  47. const llama_model & model,
  48. const float * data,
  49. size_t len,
  50. int32_t n_embd,
  51. int32_t il_start,
  52. int32_t il_end);
  53. //
  54. // llama_adapter_lora
  55. //
  56. // TODO: rename to llama_adapter_lora_weight
  57. struct llama_lora_weight {
  58. struct ggml_tensor * a = nullptr;
  59. struct ggml_tensor * b = nullptr;
  60. llama_lora_weight() = default;
  61. llama_lora_weight(struct ggml_tensor * a, struct ggml_tensor * b) : a(a), b(b) {}
  62. };
  63. // TODO: rename to llama_adapter_lora
  64. struct llama_lora_adapter {
  65. // map tensor name to lora_a_b
  66. std::unordered_map<std::string, struct llama_lora_weight> ab_map;
  67. std::vector<ggml_context_ptr> ctxs;
  68. std::vector<ggml_backend_buffer_ptr> bufs;
  69. float alpha;
  70. llama_lora_adapter() = default;
  71. ~llama_lora_adapter() = default;
  72. llama_lora_weight * get_weight(struct ggml_tensor * w);
  73. };