11 months ago · 714adb8bd1
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -738,7 +738,7 @@ struct llama_server_context
 
				                     sampler_names.emplace_back(sampler_name);
			
 
				                 }
			
 
				             }
			
 
				-            slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
			
 
				+            slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
			
 
				         }
			
 
				         else
			
 
				         {
			
@@ -1096,7 +1096,7 @@ struct llama_server_context
 
				         std::vector<std::string> samplers_sequence;
			
 
				         for (const auto &sampler_type : slot.sparams.samplers_sequence)
			
 
				         {
			
 
				-            samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
			
 
				+            samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
			
 
				         }
			
 
				 
			
 
				         return json {
			
--- a/llm/llama.cpp
+++ b/llm/llama.cpp
@@ -1 +1 @@
 
				-Subproject commit 614d3b914e1c3e02596f869649eb4f1d3b68614d
			
 
				+Subproject commit 74f33adf5f8b20b08fc5a6aa17ce081abe86ef2f
			
--- a/llm/patches/03-load_exception.diff
+++ b/llm/patches/03-load_exception.diff
@@ -1,8 +1,17 @@
 
				+From 544a2d2e646d39e878d87dfbb3398a356bc560ab Mon Sep 17 00:00:00 2001
			
 
				+From: Michael Yang <mxyng@pm.me>
			
 
				+Date: Thu, 23 May 2024 11:18:45 -0700
			
 
				+Subject: [PATCH] throw exception on load errors
			
 
				+
			
 
				+---
			
 
				+ llama.cpp | 25 ++++++++++++++++---------
			
 
				+ 1 file changed, 16 insertions(+), 9 deletions(-)
			
 
				+
			
 
				 diff --git a/llama.cpp b/llama.cpp
			
 
				-index 4225f955..7b762f86 100644
			
 
				+index 15c66077..8ba90b6a 100644
			
 
				 --- a/llama.cpp
			
 
				 +++ b/llama.cpp
			
 
				-@@ -4756,7 +4756,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
			
 
				+@@ -6346,7 +6346,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
			
 
				          }
			
 
				      } catch (const std::exception & err) {
			
 
				          LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
			
@@ -11,10 +20,10 @@ index 4225f955..7b762f86 100644
 
				      }
			
 
				  
			
 
				      return 0;
			
 
				-@@ -12102,16 +12102,22 @@ struct llama_model * llama_load_model_from_file(
			
 
				-         };
			
 
				+@@ -15600,16 +15600,23 @@ struct llama_model * llama_load_model_from_file(
			
 
				+         }
			
 
				+         model->rpc_servers.push_back(servers);
			
 
				      }
			
 
				- 
			
 
				 -    int status = llama_model_load(path_model, *model, params);
			
 
				 -    GGML_ASSERT(status <= 0);
			
 
				 -    if (status < 0) {
			
@@ -22,6 +31,7 @@ index 4225f955..7b762f86 100644
 
				 -            LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
			
 
				 -        } else if (status == -2) {
			
 
				 -            LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
			
 
				++
			
 
				 +    try {
			
 
				 +        int status = llama_model_load(path_model, *model, params);
			
 
				 +        GGML_ASSERT(status <= 0);
			
@@ -42,3 +52,6 @@ index 4225f955..7b762f86 100644
 
				      }
			
 
				  
			
 
				      return model;
			
 
				+-- 
			
 
				+2.45.1
			
 
				+
			
--- a/llm/patches/05-default-pretokenizer.diff
+++ b/llm/patches/05-default-pretokenizer.diff
@@ -0,0 +1,35 @@
 
				+From d02a06f3f45a09255ace8684a66590e06ce44605 Mon Sep 17 00:00:00 2001
			
 
				+From: Michael Yang <mxyng@pm.me>
			
 
				+Date: Thu, 23 May 2024 11:33:20 -0700
			
 
				+Subject: [PATCH] default pretokenizer on unrecognized type
			
 
				+
			
 
				+---
			
 
				+ llama.cpp | 5 +----
			
 
				+ 1 file changed, 1 insertion(+), 4 deletions(-)
			
 
				+
			
 
				+diff --git a/llama.cpp b/llama.cpp
			
 
				+index 15c66077..af1aede3 100644
			
 
				+--- a/llama.cpp
			
 
				++++ b/llama.cpp
			
 
				+@@ -4504,9 +4504,6 @@ static void llm_load_vocab(
			
 
				+                 LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
			
 
				+                 LLAMA_LOG_WARN("%s:                                             \n", __func__);
			
 
				+                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
			
 
				+-            } else if (
			
 
				+-                    tokenizer_pre == "default") {
			
 
				+-                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
			
 
				+             } else if (
			
 
				+                     tokenizer_pre == "llama3"   ||
			
 
				+                     tokenizer_pre == "llama-v3" ||
			
 
				+@@ -4553,7 +4550,7 @@ static void llm_load_vocab(
			
 
				+                 tokenizer_pre == "dbrx") {
			
 
				+                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DBRX;
			
 
				+             } else {
			
 
				+-                throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
			
 
				++                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
			
 
				+             }
			
 
				+         } else {
			
 
				+             vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
			
 
				+-- 
			
 
				+2.45.1
			
 
				+