Browse Source

llm: always add bos token to prompt (#4941)

* fix embedding by adding fixes from llama.cpp upstream

* remove assert

---------

Co-authored-by: Jesper Ek <deadbeef84@gmail.com>
Jeffrey Morgan 10 months ago
parent
commit
34f142797a
1 changed files with 2 additions and 2 deletions
  1. 2 2
      llm/ext_server/server.cpp

+ 2 - 2
llm/ext_server/server.cpp

@@ -835,7 +835,7 @@ struct llama_server_context
         system_tokens.clear();
 
         if (!system_prompt.empty()) {
-            system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
+            system_tokens = ::llama_tokenize(ctx, system_prompt, true);
 
             llama_batch_clear(batch);
 
@@ -1656,7 +1656,7 @@ struct llama_server_context
                     slot.t_start_process_prompt = ggml_time_us();
                     slot.t_start_genereration = 0;
 
-                    prompt_tokens = tokenize(slot.prompt, system_prompt.empty() && add_bos_token);  // add BOS if there isn't system prompt
+                    prompt_tokens = tokenize(slot.prompt, system_prompt.empty());  // add BOS if there isn't system prompt
 
                     slot.n_prompt_tokens = prompt_tokens.size();