0002-34B-model-support.patch 3.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. From 6145068a6613c37bb43a7408b5496524bdcfc402 Mon Sep 17 00:00:00 2001
  2. From: Bruce MacDonald <brucewmacdonald@gmail.com>
  3. Date: Mon, 28 Aug 2023 18:08:53 -0400
  4. Subject: [PATCH] 34B model support
  5. ---
  6. llama.cpp | 10 ++++++++++
  7. 1 file changed, 10 insertions(+)
  8. diff --git a/llama.cpp b/llama.cpp
  9. index f2cbe76..62c5cdf 100644
  10. --- a/llama.cpp
  11. +++ b/llama.cpp
  12. @@ -79,6 +79,7 @@ enum e_model {
  13. MODEL_7B,
  14. MODEL_13B,
  15. MODEL_30B,
  16. + MODEL_34B,
  17. MODEL_65B,
  18. MODEL_70B,
  19. };
  20. @@ -122,6 +123,7 @@ static std::map<e_model, size_t> MEM_REQ_SCRATCH0(int n_ctx)
  21. { MODEL_7B, ((size_t) n_ctx / 16ull + 100ull) * MB },
  22. { MODEL_13B, ((size_t) n_ctx / 12ull + 120ull) * MB },
  23. { MODEL_30B, ((size_t) n_ctx / 9ull + 160ull) * MB },
  24. + { MODEL_34B, ((size_t) n_ctx / 9ull + 160ull) * MB },
  25. { MODEL_65B, ((size_t) n_ctx / 6ull + 256ull) * MB }, // guess
  26. { MODEL_70B, ((size_t) n_ctx / 7ull + 164ull) * MB },
  27. };
  28. @@ -135,6 +137,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
  29. { MODEL_7B, 160ull * MB },
  30. { MODEL_13B, 192ull * MB },
  31. { MODEL_30B, 256ull * MB },
  32. + { MODEL_34B, 256ull * MB },
  33. { MODEL_65B, 384ull * MB }, // guess
  34. { MODEL_70B, 304ull * MB },
  35. };
  36. @@ -149,6 +152,7 @@ static const std::map<e_model, size_t> & MEM_REQ_EVAL()
  37. { MODEL_7B, 10ull * MB },
  38. { MODEL_13B, 12ull * MB },
  39. { MODEL_30B, 16ull * MB },
  40. + { MODEL_34B, 16ull * MB },
  41. { MODEL_65B, 24ull * MB }, // guess
  42. { MODEL_70B, 24ull * MB },
  43. };
  44. @@ -164,6 +168,7 @@ static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_BASE()
  45. { MODEL_7B, 512ull * kB },
  46. { MODEL_13B, 640ull * kB },
  47. { MODEL_30B, 768ull * kB },
  48. + { MODEL_34B, 768ull * kB },
  49. { MODEL_65B, 1280ull * kB },
  50. { MODEL_70B, 1280ull * kB },
  51. };
  52. @@ -179,6 +184,7 @@ static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_PER_CONTEXT()
  53. { MODEL_7B, 128ull },
  54. { MODEL_13B, 160ull },
  55. { MODEL_30B, 208ull },
  56. + { MODEL_34B, 208ull },
  57. { MODEL_65B, 256ull },
  58. { MODEL_70B, 256ull },
  59. };
  60. @@ -1027,6 +1033,7 @@ static const char * llama_model_type_name(e_model type) {
  61. case MODEL_7B: return "7B";
  62. case MODEL_13B: return "13B";
  63. case MODEL_30B: return "30B";
  64. + case MODEL_34B: return "34B";
  65. case MODEL_65B: return "65B";
  66. case MODEL_70B: return "70B";
  67. default: LLAMA_ASSERT(false);
  68. @@ -1074,6 +1081,7 @@ static void llama_model_load_internal(
  69. case 26: model.type = e_model::MODEL_3B; break;
  70. case 32: model.type = e_model::MODEL_7B; break;
  71. case 40: model.type = e_model::MODEL_13B; break;
  72. + case 48: model.type = e_model::MODEL_34B; break;
  73. case 60: model.type = e_model::MODEL_30B; break;
  74. case 80: model.type = e_model::MODEL_65B; break;
  75. default:
  76. @@ -1094,6 +1102,8 @@ static void llama_model_load_internal(
  77. LLAMA_LOG_WARN("%s: warning: assuming 70B model based on GQA == %d\n", __func__, n_gqa);
  78. model.type = e_model::MODEL_70B;
  79. hparams.f_ffn_mult = 1.3f; // from the params.json of the 70B model
  80. + } else if (model.type == e_model::MODEL_34B && n_gqa == 8) {
  81. + hparams.f_ffn_mult = 1.0f; // from the params.json of the 34B model
  82. }
  83. hparams.rope_freq_base = rope_freq_base;
  84. --
  85. 2.39.2 (Apple Git-143)