10 months ago · 34f142797a
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -835,7 +835,7 @@ struct llama_server_context
 
				         system_tokens.clear();
			
 
				 
			
 
				         if (!system_prompt.empty()) {
			
 
				-            system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
			
 
				+            system_tokens = ::llama_tokenize(ctx, system_prompt, true);
			
 
				 
			
 
				             llama_batch_clear(batch);
			
 
				 
			
@@ -1656,7 +1656,7 @@ struct llama_server_context
 
				                     slot.t_start_process_prompt = ggml_time_us();
			
 
				                     slot.t_start_genereration = 0;
			
 
				 
			
 
				-                    prompt_tokens = tokenize(slot.prompt, system_prompt.empty() && add_bos_token);  // add BOS if there isn't system prompt
			
 
				+                    prompt_tokens = tokenize(slot.prompt, system_prompt.empty());  // add BOS if there isn't system prompt
			
 
				 
			
 
				                     slot.n_prompt_tokens = prompt_tokens.size();