llama-chat.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. /**
  2. * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
  3. *
  4. * MIT License
  5. *
  6. * Copyright (c) 2023-2024 The ggml authors
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in all
  16. * copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  24. * SOFTWARE.
  25. */
  26. #include "llama-chat.h"
  27. #include "llama.h"
  28. #include <map>
  29. #include <sstream>
  30. #if __cplusplus >= 202000L
  31. #define LU8(x) (const char*)(u8##x)
  32. #else
  33. #define LU8(x) u8##x
  34. #endif
  35. // trim whitespace from the beginning and end of a string
  36. static std::string trim(const std::string & str) {
  37. size_t start = 0;
  38. size_t end = str.size();
  39. while (start < end && isspace(str[start])) {
  40. start += 1;
  41. }
  42. while (end > start && isspace(str[end - 1])) {
  43. end -= 1;
  44. }
  45. return str.substr(start, end - start);
  46. }
  47. static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
  48. { "chatml", LLM_CHAT_TEMPLATE_CHATML },
  49. { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 },
  50. { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS },
  51. { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS },
  52. { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
  53. { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 },
  54. { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
  55. { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
  56. { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
  57. { "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
  58. { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
  59. { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
  60. { "monarch", LLM_CHAT_TEMPLATE_MONARCH },
  61. { "gemma", LLM_CHAT_TEMPLATE_GEMMA },
  62. { "orion", LLM_CHAT_TEMPLATE_ORION },
  63. { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT },
  64. { "vicuna", LLM_CHAT_TEMPLATE_VICUNA },
  65. { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA },
  66. { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
  67. { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
  68. { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
  69. { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
  70. { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
  71. { "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
  72. { "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 },
  73. { "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
  74. { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
  75. { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
  76. { "granite", LLM_CHAT_TEMPLATE_GRANITE },
  77. { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
  78. { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
  79. };
  80. llm_chat_template llm_chat_template_from_str(const std::string & name) {
  81. return LLM_CHAT_TEMPLATES.at(name);
  82. }
  83. llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
  84. try {
  85. return llm_chat_template_from_str(tmpl);
  86. } catch (const std::out_of_range &) {
  87. // ignore
  88. }
  89. auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
  90. return tmpl.find(haystack) != std::string::npos;
  91. };
  92. if (tmpl_contains("<|im_start|>")) {
  93. return LLM_CHAT_TEMPLATE_CHATML;
  94. } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
  95. if (tmpl_contains("[SYSTEM_PROMPT]")) {
  96. return LLM_CHAT_TEMPLATE_MISTRAL_V7;
  97. } else if (
  98. // catches official 'v1' template
  99. tmpl_contains("' [INST] ' + system_message")
  100. // catches official 'v3' and 'v3-tekken' templates
  101. || tmpl_contains("[AVAILABLE_TOOLS]")
  102. ) {
  103. // Official mistral 'v1', 'v3' and 'v3-tekken' templates
  104. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
  105. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
  106. if (tmpl_contains(" [INST]")) {
  107. return LLM_CHAT_TEMPLATE_MISTRAL_V1;
  108. } else if (tmpl_contains("\"[INST]\"")) {
  109. return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
  110. }
  111. return LLM_CHAT_TEMPLATE_MISTRAL_V3;
  112. } else {
  113. // llama2 template and its variants
  114. // [variant] support system message
  115. // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
  116. bool support_system_message = tmpl_contains("<<SYS>>");
  117. bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
  118. bool strip_message = tmpl_contains("content.strip()");
  119. if (strip_message) {
  120. return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
  121. } else if (add_bos_inside_history) {
  122. return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
  123. } else if (support_system_message) {
  124. return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
  125. } else {
  126. return LLM_CHAT_TEMPLATE_LLAMA_2;
  127. }
  128. }
  129. } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
  130. return LLM_CHAT_TEMPLATE_PHI_3;
  131. } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
  132. return LLM_CHAT_TEMPLATE_FALCON_3;
  133. } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
  134. return LLM_CHAT_TEMPLATE_ZEPHYR;
  135. } else if (tmpl_contains("bos_token + message['role']")) {
  136. return LLM_CHAT_TEMPLATE_MONARCH;
  137. } else if (tmpl_contains("<start_of_turn>")) {
  138. return LLM_CHAT_TEMPLATE_GEMMA;
  139. } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
  140. // OrionStarAI/Orion-14B-Chat
  141. return LLM_CHAT_TEMPLATE_ORION;
  142. } else if (tmpl_contains("GPT4 Correct ")) {
  143. // openchat/openchat-3.5-0106
  144. return LLM_CHAT_TEMPLATE_OPENCHAT;
  145. } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
  146. // eachadea/vicuna-13b-1.1 (and Orca variant)
  147. if (tmpl_contains("SYSTEM: ")) {
  148. return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
  149. }
  150. return LLM_CHAT_TEMPLATE_VICUNA;
  151. } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
  152. // deepseek-ai/deepseek-coder-33b-instruct
  153. return LLM_CHAT_TEMPLATE_DEEPSEEK;
  154. } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
  155. // CohereForAI/c4ai-command-r-plus
  156. return LLM_CHAT_TEMPLATE_COMMAND_R;
  157. } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
  158. return LLM_CHAT_TEMPLATE_LLAMA_3;
  159. } else if (tmpl_contains("[gMASK]sop")) {
  160. // chatglm3-6b
  161. return LLM_CHAT_TEMPLATE_CHATGML_3;
  162. } else if (tmpl_contains("[gMASK]<sop>")) {
  163. return LLM_CHAT_TEMPLATE_CHATGML_4;
  164. } else if (tmpl_contains(LU8("<用户>"))) {
  165. // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
  166. return LLM_CHAT_TEMPLATE_MINICPM;
  167. } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
  168. return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
  169. } else if (tmpl_contains(LU8("'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'"))) {
  170. return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
  171. } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
  172. // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
  173. // EXAONE-3.0-7.8B-Instruct
  174. return LLM_CHAT_TEMPLATE_EXAONE_3;
  175. } else if (tmpl_contains("rwkv-world")) {
  176. return LLM_CHAT_TEMPLATE_RWKV_WORLD;
  177. } else if (tmpl_contains("<|start_of_role|>")) {
  178. return LLM_CHAT_TEMPLATE_GRANITE;
  179. } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
  180. return LLM_CHAT_TEMPLATE_GIGACHAT;
  181. } else if (tmpl_contains("<|role_start|>")) {
  182. return LLM_CHAT_TEMPLATE_MEGREZ;
  183. }
  184. return LLM_CHAT_TEMPLATE_UNKNOWN;
  185. }
  186. // Simple version of "llama_apply_chat_template" that only works with strings
  187. // This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
  188. int32_t llm_chat_apply_template(
  189. llm_chat_template tmpl,
  190. const std::vector<const llama_chat_message *> & chat,
  191. std::string & dest, bool add_ass) {
  192. // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
  193. std::stringstream ss;
  194. if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
  195. // chatml template
  196. for (auto message : chat) {
  197. ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
  198. }
  199. if (add_ass) {
  200. ss << "<|im_start|>assistant\n";
  201. }
  202. } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) {
  203. // Official mistral 'v7' template
  204. // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
  205. for (auto message : chat) {
  206. std::string role(message->role);
  207. std::string content(message->content);
  208. if (role == "system") {
  209. ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
  210. } else if (role == "user") {
  211. ss << "[INST] " << content << "[/INST]";
  212. }
  213. else {
  214. ss << " " << content << "</s>";
  215. }
  216. }
  217. } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
  218. || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
  219. || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
  220. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
  221. // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
  222. std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
  223. std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
  224. bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
  225. bool is_inside_turn = false;
  226. for (auto message : chat) {
  227. if (!is_inside_turn) {
  228. ss << leading_space << "[INST]" << trailing_space;
  229. is_inside_turn = true;
  230. }
  231. std::string role(message->role);
  232. std::string content(message->content);
  233. if (role == "system") {
  234. ss << content << "\n\n";
  235. } else if (role == "user") {
  236. ss << content << leading_space << "[/INST]";
  237. } else {
  238. ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
  239. is_inside_turn = false;
  240. }
  241. }
  242. } else if (
  243. tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
  244. || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
  245. || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
  246. || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
  247. // llama2 template and its variants
  248. // [variant] support system message
  249. // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
  250. bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
  251. // [variant] add BOS inside history
  252. bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
  253. // [variant] trim spaces from the input message
  254. bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
  255. // construct the prompt
  256. bool is_inside_turn = true; // skip BOS at the beginning
  257. ss << "[INST] ";
  258. for (auto message : chat) {
  259. std::string content = strip_message ? trim(message->content) : message->content;
  260. std::string role(message->role);
  261. if (!is_inside_turn) {
  262. is_inside_turn = true;
  263. ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
  264. }
  265. if (role == "system") {
  266. if (support_system_message) {
  267. ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
  268. } else {
  269. // if the model does not support system message, we still include it in the first message, but without <<SYS>>
  270. ss << content << "\n";
  271. }
  272. } else if (role == "user") {
  273. ss << content << " [/INST]";
  274. } else {
  275. ss << content << "</s>";
  276. is_inside_turn = false;
  277. }
  278. }
  279. } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
  280. // Phi 3
  281. for (auto message : chat) {
  282. std::string role(message->role);
  283. ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
  284. }
  285. if (add_ass) {
  286. ss << "<|assistant|>\n";
  287. }
  288. } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
  289. // Falcon 3
  290. for (auto message : chat) {
  291. std::string role(message->role);
  292. ss << "<|" << role << "|>\n" << message->content << "\n";
  293. }
  294. if (add_ass) {
  295. ss << "<|assistant|>\n";
  296. }
  297. } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
  298. // zephyr template
  299. for (auto message : chat) {
  300. ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
  301. }
  302. if (add_ass) {
  303. ss << "<|assistant|>\n";
  304. }
  305. } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
  306. // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
  307. for (auto message : chat) {
  308. std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
  309. ss << bos << message->role << "\n" << message->content << "</s>\n";
  310. }
  311. if (add_ass) {
  312. ss << "<s>assistant\n";
  313. }
  314. } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
  315. // google/gemma-7b-it
  316. std::string system_prompt = "";
  317. for (auto message : chat) {
  318. std::string role(message->role);
  319. if (role == "system") {
  320. // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
  321. system_prompt = trim(message->content);
  322. continue;
  323. }
  324. // in gemma, "assistant" is "model"
  325. role = role == "assistant" ? "model" : message->role;
  326. ss << "<start_of_turn>" << role << "\n";
  327. if (!system_prompt.empty() && role != "model") {
  328. ss << system_prompt << "\n\n";
  329. system_prompt = "";
  330. }
  331. ss << trim(message->content) << "<end_of_turn>\n";
  332. }
  333. if (add_ass) {
  334. ss << "<start_of_turn>model\n";
  335. }
  336. } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
  337. // OrionStarAI/Orion-14B-Chat
  338. std::string system_prompt = "";
  339. for (auto message : chat) {
  340. std::string role(message->role);
  341. if (role == "system") {
  342. // there is no system message support, we will merge it with user prompt
  343. system_prompt = message->content;
  344. continue;
  345. } else if (role == "user") {
  346. ss << "Human: ";
  347. if (!system_prompt.empty()) {
  348. ss << system_prompt << "\n\n";
  349. system_prompt = "";
  350. }
  351. ss << message->content << "\n\nAssistant: </s>";
  352. } else {
  353. ss << message->content << "</s>";
  354. }
  355. }
  356. } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
  357. // openchat/openchat-3.5-0106,
  358. for (auto message : chat) {
  359. std::string role(message->role);
  360. if (role == "system") {
  361. ss << message->content << "<|end_of_turn|>";
  362. } else {
  363. role[0] = toupper(role[0]);
  364. ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
  365. }
  366. }
  367. if (add_ass) {
  368. ss << "GPT4 Correct Assistant:";
  369. }
  370. } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
  371. // eachadea/vicuna-13b-1.1 (and Orca variant)
  372. for (auto message : chat) {
  373. std::string role(message->role);
  374. if (role == "system") {
  375. // Orca-Vicuna variant uses a system prefix
  376. if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
  377. ss << "SYSTEM: " << message->content << "\n";
  378. } else {
  379. ss << message->content << "\n\n";
  380. }
  381. } else if (role == "user") {
  382. ss << "USER: " << message->content << "\n";
  383. } else if (role == "assistant") {
  384. ss << "ASSISTANT: " << message->content << "</s>\n";
  385. }
  386. }
  387. if (add_ass) {
  388. ss << "ASSISTANT:";
  389. }
  390. } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
  391. // deepseek-ai/deepseek-coder-33b-instruct
  392. for (auto message : chat) {
  393. std::string role(message->role);
  394. if (role == "system") {
  395. ss << message->content;
  396. } else if (role == "user") {
  397. ss << "### Instruction:\n" << message->content << "\n";
  398. } else if (role == "assistant") {
  399. ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
  400. }
  401. }
  402. if (add_ass) {
  403. ss << "### Response:\n";
  404. }
  405. } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
  406. // CohereForAI/c4ai-command-r-plus
  407. for (auto message : chat) {
  408. std::string role(message->role);
  409. if (role == "system") {
  410. ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
  411. } else if (role == "user") {
  412. ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
  413. } else if (role == "assistant") {
  414. ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
  415. }
  416. }
  417. if (add_ass) {
  418. ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
  419. }
  420. } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
  421. // Llama 3
  422. for (auto message : chat) {
  423. std::string role(message->role);
  424. ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
  425. }
  426. if (add_ass) {
  427. ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
  428. }
  429. } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_3) {
  430. // chatglm3-6b
  431. ss << "[gMASK]" << "sop";
  432. for (auto message : chat) {
  433. std::string role(message->role);
  434. ss << "<|" << role << "|>" << "\n " << message->content;
  435. }
  436. if (add_ass) {
  437. ss << "<|assistant|>";
  438. }
  439. } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_4) {
  440. ss << "[gMASK]" << "<sop>";
  441. for (auto message : chat) {
  442. std::string role(message->role);
  443. ss << "<|" << role << "|>" << "\n" << message->content;
  444. }
  445. if (add_ass) {
  446. ss << "<|assistant|>";
  447. }
  448. } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
  449. // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
  450. for (auto message : chat) {
  451. std::string role(message->role);
  452. if (role == "user") {
  453. ss << LU8("<用户>");
  454. ss << trim(message->content);
  455. ss << "<AI>";
  456. } else {
  457. ss << trim(message->content);
  458. }
  459. }
  460. } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
  461. // DeepSeek-V2
  462. for (auto message : chat) {
  463. std::string role(message->role);
  464. if (role == "system") {
  465. ss << message->content << "\n\n";
  466. } else if (role == "user") {
  467. ss << "User: " << message->content << "\n\n";
  468. } else if (role == "assistant") {
  469. ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>");
  470. }
  471. }
  472. if (add_ass) {
  473. ss << "Assistant:";
  474. }
  475. } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
  476. // DeepSeek-V3
  477. for (auto message : chat) {
  478. std::string role(message->role);
  479. if (role == "system") {
  480. ss << message->content << "\n\n";
  481. } else if (role == "user") {
  482. ss << LU8("<|User|>") << message->content;
  483. } else if (role == "assistant") {
  484. ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
  485. }
  486. }
  487. if (add_ass) {
  488. ss << LU8("<|Assistant|>");
  489. }
  490. } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
  491. // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
  492. // EXAONE-3.0-7.8B-Instruct
  493. for (auto message : chat) {
  494. std::string role(message->role);
  495. if (role == "system") {
  496. ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
  497. } else if (role == "user") {
  498. ss << "[|user|]" << trim(message->content) << "\n";
  499. } else if (role == "assistant") {
  500. ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
  501. }
  502. }
  503. if (add_ass) {
  504. ss << "[|assistant|]";
  505. }
  506. } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
  507. // this template requires the model to have "\n\n" as EOT token
  508. for (auto message : chat) {
  509. std::string role(message->role);
  510. if (role == "user") {
  511. ss << "User: " << message->content << "\n\nAssistant:";
  512. } else {
  513. ss << message->content << "\n\n";
  514. }
  515. }
  516. } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
  517. // IBM Granite template
  518. for (const auto & message : chat) {
  519. std::string role(message->role);
  520. ss << "<|start_of_role|>" << role << "<|end_of_role|>";
  521. if (role == "assistant_tool_call") {
  522. ss << "<|tool_call|>";
  523. }
  524. ss << message->content << "<|end_of_text|>\n";
  525. }
  526. if (add_ass) {
  527. ss << "<|start_of_role|>assistant<|end_of_role|>\n";
  528. }
  529. } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
  530. // GigaChat template
  531. bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
  532. // Handle system message if present
  533. if (has_system) {
  534. ss << "<s>" << chat[0]->content << "<|message_sep|>";
  535. } else {
  536. ss << "<s>";
  537. }
  538. // Process remaining messages
  539. for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
  540. std::string role(chat[i]->role);
  541. if (role == "user") {
  542. ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
  543. << "available functions<|role_sep|>[]<|message_sep|>";
  544. } else if (role == "assistant") {
  545. ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
  546. }
  547. }
  548. // Add generation prompt if needed
  549. if (add_ass) {
  550. ss << "assistant<|role_sep|>";
  551. }
  552. } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
  553. // Megrez template
  554. for (auto message : chat) {
  555. std::string role(message->role);
  556. ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
  557. }
  558. if (add_ass) {
  559. ss << "<|role_start|>assistant<|role_end|>";
  560. }
  561. } else {
  562. // template not supported
  563. return -1;
  564. }
  565. dest = ss.str();
  566. return dest.size();
  567. }
  568. // public interface
  569. int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
  570. auto it = LLM_CHAT_TEMPLATES.begin();
  571. for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) {
  572. output[i] = it->first.c_str();
  573. std::advance(it, 1);
  574. }
  575. return (int32_t) LLM_CHAT_TEMPLATES.size();
  576. }