123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593 |
- /**
- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
- *
- * MIT License
- *
- * Copyright (c) 2023-2024 The ggml authors
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
- #include "llama-chat.h"
- #include "llama.h"
- #include <map>
- #include <sstream>
- #if __cplusplus >= 202000L
- #define LU8(x) (const char*)(u8##x)
- #else
- #define LU8(x) u8##x
- #endif
- // trim whitespace from the beginning and end of a string
- static std::string trim(const std::string & str) {
- size_t start = 0;
- size_t end = str.size();
- while (start < end && isspace(str[start])) {
- start += 1;
- }
- while (end > start && isspace(str[end - 1])) {
- end -= 1;
- }
- return str.substr(start, end - start);
- }
- static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
- { "chatml", LLM_CHAT_TEMPLATE_CHATML },
- { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 },
- { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS },
- { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS },
- { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
- { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 },
- { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
- { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
- { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
- { "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
- { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
- { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
- { "monarch", LLM_CHAT_TEMPLATE_MONARCH },
- { "gemma", LLM_CHAT_TEMPLATE_GEMMA },
- { "orion", LLM_CHAT_TEMPLATE_ORION },
- { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT },
- { "vicuna", LLM_CHAT_TEMPLATE_VICUNA },
- { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA },
- { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
- { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
- { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
- { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
- { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
- { "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
- { "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 },
- { "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
- { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
- { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
- { "granite", LLM_CHAT_TEMPLATE_GRANITE },
- { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
- { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
- };
- llm_chat_template llm_chat_template_from_str(const std::string & name) {
- return LLM_CHAT_TEMPLATES.at(name);
- }
- llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
- try {
- return llm_chat_template_from_str(tmpl);
- } catch (const std::out_of_range &) {
- // ignore
- }
- auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
- return tmpl.find(haystack) != std::string::npos;
- };
- if (tmpl_contains("<|im_start|>")) {
- return LLM_CHAT_TEMPLATE_CHATML;
- } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
- if (tmpl_contains("[SYSTEM_PROMPT]")) {
- return LLM_CHAT_TEMPLATE_MISTRAL_V7;
- } else if (
- // catches official 'v1' template
- tmpl_contains("' [INST] ' + system_message")
- // catches official 'v3' and 'v3-tekken' templates
- || tmpl_contains("[AVAILABLE_TOOLS]")
- ) {
- // Official mistral 'v1', 'v3' and 'v3-tekken' templates
- // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
- // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
- if (tmpl_contains(" [INST]")) {
- return LLM_CHAT_TEMPLATE_MISTRAL_V1;
- } else if (tmpl_contains("\"[INST]\"")) {
- return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
- }
- return LLM_CHAT_TEMPLATE_MISTRAL_V3;
- } else {
- // llama2 template and its variants
- // [variant] support system message
- // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
- bool support_system_message = tmpl_contains("<<SYS>>");
- bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
- bool strip_message = tmpl_contains("content.strip()");
- if (strip_message) {
- return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
- } else if (add_bos_inside_history) {
- return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
- } else if (support_system_message) {
- return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
- } else {
- return LLM_CHAT_TEMPLATE_LLAMA_2;
- }
- }
- } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
- return LLM_CHAT_TEMPLATE_PHI_3;
- } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
- return LLM_CHAT_TEMPLATE_FALCON_3;
- } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
- return LLM_CHAT_TEMPLATE_ZEPHYR;
- } else if (tmpl_contains("bos_token + message['role']")) {
- return LLM_CHAT_TEMPLATE_MONARCH;
- } else if (tmpl_contains("<start_of_turn>")) {
- return LLM_CHAT_TEMPLATE_GEMMA;
- } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
- // OrionStarAI/Orion-14B-Chat
- return LLM_CHAT_TEMPLATE_ORION;
- } else if (tmpl_contains("GPT4 Correct ")) {
- // openchat/openchat-3.5-0106
- return LLM_CHAT_TEMPLATE_OPENCHAT;
- } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
- // eachadea/vicuna-13b-1.1 (and Orca variant)
- if (tmpl_contains("SYSTEM: ")) {
- return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
- }
- return LLM_CHAT_TEMPLATE_VICUNA;
- } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
- // deepseek-ai/deepseek-coder-33b-instruct
- return LLM_CHAT_TEMPLATE_DEEPSEEK;
- } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
- // CohereForAI/c4ai-command-r-plus
- return LLM_CHAT_TEMPLATE_COMMAND_R;
- } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
- return LLM_CHAT_TEMPLATE_LLAMA_3;
- } else if (tmpl_contains("[gMASK]sop")) {
- // chatglm3-6b
- return LLM_CHAT_TEMPLATE_CHATGML_3;
- } else if (tmpl_contains("[gMASK]<sop>")) {
- return LLM_CHAT_TEMPLATE_CHATGML_4;
- } else if (tmpl_contains(LU8("<用户>"))) {
- // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
- return LLM_CHAT_TEMPLATE_MINICPM;
- } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
- return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
- } else if (tmpl_contains(LU8("'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'"))) {
- return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
- } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
- // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
- // EXAONE-3.0-7.8B-Instruct
- return LLM_CHAT_TEMPLATE_EXAONE_3;
- } else if (tmpl_contains("rwkv-world")) {
- return LLM_CHAT_TEMPLATE_RWKV_WORLD;
- } else if (tmpl_contains("<|start_of_role|>")) {
- return LLM_CHAT_TEMPLATE_GRANITE;
- } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
- return LLM_CHAT_TEMPLATE_GIGACHAT;
- } else if (tmpl_contains("<|role_start|>")) {
- return LLM_CHAT_TEMPLATE_MEGREZ;
- }
- return LLM_CHAT_TEMPLATE_UNKNOWN;
- }
- // Simple version of "llama_apply_chat_template" that only works with strings
- // This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
- int32_t llm_chat_apply_template(
- llm_chat_template tmpl,
- const std::vector<const llama_chat_message *> & chat,
- std::string & dest, bool add_ass) {
- // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
- std::stringstream ss;
- if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
- // chatml template
- for (auto message : chat) {
- ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
- }
- if (add_ass) {
- ss << "<|im_start|>assistant\n";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) {
- // Official mistral 'v7' template
- // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
- for (auto message : chat) {
- std::string role(message->role);
- std::string content(message->content);
- if (role == "system") {
- ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
- } else if (role == "user") {
- ss << "[INST] " << content << "[/INST]";
- }
- else {
- ss << " " << content << "</s>";
- }
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
- || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
- || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
- // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
- // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
- std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
- std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
- bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
- bool is_inside_turn = false;
- for (auto message : chat) {
- if (!is_inside_turn) {
- ss << leading_space << "[INST]" << trailing_space;
- is_inside_turn = true;
- }
- std::string role(message->role);
- std::string content(message->content);
- if (role == "system") {
- ss << content << "\n\n";
- } else if (role == "user") {
- ss << content << leading_space << "[/INST]";
- } else {
- ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
- is_inside_turn = false;
- }
- }
- } else if (
- tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
- || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
- || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
- || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
- // llama2 template and its variants
- // [variant] support system message
- // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
- bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
- // [variant] add BOS inside history
- bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
- // [variant] trim spaces from the input message
- bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
- // construct the prompt
- bool is_inside_turn = true; // skip BOS at the beginning
- ss << "[INST] ";
- for (auto message : chat) {
- std::string content = strip_message ? trim(message->content) : message->content;
- std::string role(message->role);
- if (!is_inside_turn) {
- is_inside_turn = true;
- ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
- }
- if (role == "system") {
- if (support_system_message) {
- ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
- } else {
- // if the model does not support system message, we still include it in the first message, but without <<SYS>>
- ss << content << "\n";
- }
- } else if (role == "user") {
- ss << content << " [/INST]";
- } else {
- ss << content << "</s>";
- is_inside_turn = false;
- }
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
- // Phi 3
- for (auto message : chat) {
- std::string role(message->role);
- ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
- }
- if (add_ass) {
- ss << "<|assistant|>\n";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
- // Falcon 3
- for (auto message : chat) {
- std::string role(message->role);
- ss << "<|" << role << "|>\n" << message->content << "\n";
- }
- if (add_ass) {
- ss << "<|assistant|>\n";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
- // zephyr template
- for (auto message : chat) {
- ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
- }
- if (add_ass) {
- ss << "<|assistant|>\n";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
- // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
- for (auto message : chat) {
- std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
- ss << bos << message->role << "\n" << message->content << "</s>\n";
- }
- if (add_ass) {
- ss << "<s>assistant\n";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
- // google/gemma-7b-it
- std::string system_prompt = "";
- for (auto message : chat) {
- std::string role(message->role);
- if (role == "system") {
- // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
- system_prompt = trim(message->content);
- continue;
- }
- // in gemma, "assistant" is "model"
- role = role == "assistant" ? "model" : message->role;
- ss << "<start_of_turn>" << role << "\n";
- if (!system_prompt.empty() && role != "model") {
- ss << system_prompt << "\n\n";
- system_prompt = "";
- }
- ss << trim(message->content) << "<end_of_turn>\n";
- }
- if (add_ass) {
- ss << "<start_of_turn>model\n";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
- // OrionStarAI/Orion-14B-Chat
- std::string system_prompt = "";
- for (auto message : chat) {
- std::string role(message->role);
- if (role == "system") {
- // there is no system message support, we will merge it with user prompt
- system_prompt = message->content;
- continue;
- } else if (role == "user") {
- ss << "Human: ";
- if (!system_prompt.empty()) {
- ss << system_prompt << "\n\n";
- system_prompt = "";
- }
- ss << message->content << "\n\nAssistant: </s>";
- } else {
- ss << message->content << "</s>";
- }
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
- // openchat/openchat-3.5-0106,
- for (auto message : chat) {
- std::string role(message->role);
- if (role == "system") {
- ss << message->content << "<|end_of_turn|>";
- } else {
- role[0] = toupper(role[0]);
- ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
- }
- }
- if (add_ass) {
- ss << "GPT4 Correct Assistant:";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
- // eachadea/vicuna-13b-1.1 (and Orca variant)
- for (auto message : chat) {
- std::string role(message->role);
- if (role == "system") {
- // Orca-Vicuna variant uses a system prefix
- if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
- ss << "SYSTEM: " << message->content << "\n";
- } else {
- ss << message->content << "\n\n";
- }
- } else if (role == "user") {
- ss << "USER: " << message->content << "\n";
- } else if (role == "assistant") {
- ss << "ASSISTANT: " << message->content << "</s>\n";
- }
- }
- if (add_ass) {
- ss << "ASSISTANT:";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
- // deepseek-ai/deepseek-coder-33b-instruct
- for (auto message : chat) {
- std::string role(message->role);
- if (role == "system") {
- ss << message->content;
- } else if (role == "user") {
- ss << "### Instruction:\n" << message->content << "\n";
- } else if (role == "assistant") {
- ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
- }
- }
- if (add_ass) {
- ss << "### Response:\n";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
- // CohereForAI/c4ai-command-r-plus
- for (auto message : chat) {
- std::string role(message->role);
- if (role == "system") {
- ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
- } else if (role == "user") {
- ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
- } else if (role == "assistant") {
- ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
- }
- }
- if (add_ass) {
- ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
- // Llama 3
- for (auto message : chat) {
- std::string role(message->role);
- ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
- }
- if (add_ass) {
- ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_3) {
- // chatglm3-6b
- ss << "[gMASK]" << "sop";
- for (auto message : chat) {
- std::string role(message->role);
- ss << "<|" << role << "|>" << "\n " << message->content;
- }
- if (add_ass) {
- ss << "<|assistant|>";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_4) {
- ss << "[gMASK]" << "<sop>";
- for (auto message : chat) {
- std::string role(message->role);
- ss << "<|" << role << "|>" << "\n" << message->content;
- }
- if (add_ass) {
- ss << "<|assistant|>";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
- // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
- for (auto message : chat) {
- std::string role(message->role);
- if (role == "user") {
- ss << LU8("<用户>");
- ss << trim(message->content);
- ss << "<AI>";
- } else {
- ss << trim(message->content);
- }
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
- // DeepSeek-V2
- for (auto message : chat) {
- std::string role(message->role);
- if (role == "system") {
- ss << message->content << "\n\n";
- } else if (role == "user") {
- ss << "User: " << message->content << "\n\n";
- } else if (role == "assistant") {
- ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>");
- }
- }
- if (add_ass) {
- ss << "Assistant:";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
- // DeepSeek-V3
- for (auto message : chat) {
- std::string role(message->role);
- if (role == "system") {
- ss << message->content << "\n\n";
- } else if (role == "user") {
- ss << LU8("<|User|>") << message->content;
- } else if (role == "assistant") {
- ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
- }
- }
- if (add_ass) {
- ss << LU8("<|Assistant|>");
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
- // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
- // EXAONE-3.0-7.8B-Instruct
- for (auto message : chat) {
- std::string role(message->role);
- if (role == "system") {
- ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
- } else if (role == "user") {
- ss << "[|user|]" << trim(message->content) << "\n";
- } else if (role == "assistant") {
- ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
- }
- }
- if (add_ass) {
- ss << "[|assistant|]";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
- // this template requires the model to have "\n\n" as EOT token
- for (auto message : chat) {
- std::string role(message->role);
- if (role == "user") {
- ss << "User: " << message->content << "\n\nAssistant:";
- } else {
- ss << message->content << "\n\n";
- }
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
- // IBM Granite template
- for (const auto & message : chat) {
- std::string role(message->role);
- ss << "<|start_of_role|>" << role << "<|end_of_role|>";
- if (role == "assistant_tool_call") {
- ss << "<|tool_call|>";
- }
- ss << message->content << "<|end_of_text|>\n";
- }
- if (add_ass) {
- ss << "<|start_of_role|>assistant<|end_of_role|>\n";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
- // GigaChat template
- bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
- // Handle system message if present
- if (has_system) {
- ss << "<s>" << chat[0]->content << "<|message_sep|>";
- } else {
- ss << "<s>";
- }
- // Process remaining messages
- for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
- std::string role(chat[i]->role);
- if (role == "user") {
- ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
- << "available functions<|role_sep|>[]<|message_sep|>";
- } else if (role == "assistant") {
- ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
- }
- }
- // Add generation prompt if needed
- if (add_ass) {
- ss << "assistant<|role_sep|>";
- }
- } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
- // Megrez template
- for (auto message : chat) {
- std::string role(message->role);
- ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
- }
- if (add_ass) {
- ss << "<|role_start|>assistant<|role_end|>";
- }
- } else {
- // template not supported
- return -1;
- }
- dest = ss.str();
- return dest.size();
- }
- // public interface
- int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
- auto it = LLM_CHAT_TEMPLATES.begin();
- for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) {
- output[i] = it->first.c_str();
- std::advance(it, 1);
- }
- return (int32_t) LLM_CHAT_TEMPLATES.size();
- }
|