Browse Source

Update llama.cpp submodule to `a8db2a9c` (#5530)

Jeffrey Morgan 10 months ago
parent
commit
571dc61955
2 changed files with 6 additions and 6 deletions
  1. 1 1
      llm/llama.cpp
  2. 5 5
      llm/patches/05-default-pretokenizer.diff

+ 1 - 1
llm/llama.cpp

@@ -1 +1 @@
-Subproject commit d7fd29fff16456ce9c3a23fd2d09a66256b05aff
+Subproject commit a8db2a9ce64cd4417f6a312ab61858f17f0f8584

+ 5 - 5
llm/patches/05-default-pretokenizer.diff

@@ -1,11 +1,11 @@
 diff --git a/src/llama.cpp b/src/llama.cpp
-index 73f52435..2b81b4bd 100644
+index 2b9ace28..172640e2 100644
 --- a/src/llama.cpp
 +++ b/src/llama.cpp
-@@ -5092,16 +5092,7 @@ static void llm_load_vocab(
- 
-         // for now, only BPE models have pre-tokenizers
+@@ -5357,16 +5357,7 @@ static void llm_load_vocab(
          if (vocab.type == LLAMA_VOCAB_TYPE_BPE) {
+             vocab.tokenizer_add_space_prefix = false;
+             vocab.tokenizer_clean_spaces = true;
 -            if (tokenizer_pre.empty()) {
 -                LLAMA_LOG_WARN("%s: missing pre-tokenizer type, using: 'default'\n", __func__);
 -                LLAMA_LOG_WARN("%s:                                             \n", __func__);
@@ -20,7 +20,7 @@ index 73f52435..2b81b4bd 100644
                  vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
              } else if (
                      tokenizer_pre == "llama3"   ||
-@@ -5164,7 +5155,8 @@ static void llm_load_vocab(
+@@ -5439,7 +5430,8 @@ static void llm_load_vocab(
                  tokenizer_pre == "jais") {
                  vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_JAIS;
              } else {