1 rok temu · 18d9a7e1f1
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -1032,7 +1032,7 @@ struct llama_server_context
 
				             slot.has_next_token = false;
			
 
				         }
			
 
				 
			
 
				-        if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model))
			
 
				+        if (llama_token_is_eog(model, result.tok))
			
 
				         {
			
 
				             slot.stopped_eos = true;
			
 
				             slot.has_next_token = false;
			
@@ -1144,12 +1144,15 @@ struct llama_server_context
 
				 
			
 
				         res.result_json = json
			
 
				         {
			
 
				-            {"content",    tkn.text_to_send},
			
 
				             {"stop",       false},
			
 
				             {"slot_id",    slot.id},
			
 
				             {"multimodal", multimodal}
			
 
				         };
			
 
				 
			
 
				+        if (!llama_token_is_eog(model, tkn.tok)) {
			
 
				+            res.result_json["content"] = tkn.text_to_send;
			
 
				+        }
			
 
				+
			
 
				         if (slot.sparams.n_probs > 0)
			
 
				         {
			
 
				             std::vector<completion_token_output> probs_output = {};
			
--- a/llm/llama.cpp
+++ b/llm/llama.cpp
@@ -1 +1 @@
 
				-Subproject commit f4ab2a41476600a98067a9474ea8f9e6db41bcfa
			
 
				+Subproject commit f364eb6fb5d46118a76fa045f487318de4c24961