1 tahun lalu · c9dfa6e571
--- a/llm/llama.cpp
+++ b/llm/llama.cpp
@@ -1 +1 @@
 
				-Subproject commit d2f650cb5b04ee2726663e79b47da5efe196ce00
			
 
				+Subproject commit f57fadc009cbff741a1961cb7896c47d73978d2c
			
--- a/llm/patches/01-cache.diff
+++ b/llm/patches/01-cache.diff
@@ -1,8 +1,8 @@
 
				 diff --git a/examples/server/server.cpp b/examples/server/server.cpp
			
 
				-index a48582ad..9fffffd8 100644
			
 
				+index d86d7e04..7d71c766 100644
			
 
				 --- a/examples/server/server.cpp
			
 
				 +++ b/examples/server/server.cpp
			
 
				-@@ -1564,12 +1564,6 @@ struct llama_server_context
			
 
				+@@ -1598,12 +1598,6 @@ struct llama_server_context
			
 
				                          LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed);
			
 
				                      }
			
 
				  
			
@@ -15,7 +15,7 @@ index a48582ad..9fffffd8 100644
 
				                      if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0)
			
 
				                      {
			
 
				                          // we have to evaluate at least 1 token to generate logits.
			
 
				-@@ -1581,6 +1575,12 @@ struct llama_server_context
			
 
				+@@ -1615,6 +1609,12 @@ struct llama_server_context
			
 
				                          }
			
 
				                      }
			
 
				  
			
@@ -26,5 +26,5 @@ index a48582ad..9fffffd8 100644
 
				 +                    slot.cache_tokens = prompt_tokens;
			
 
				 +
			
 
				                      LOG_VERBOSE("prompt ingested", {
			
 
				-                                                     {"n_past", slot.n_past},
			
 
				-                                                     {"cached", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)},
			
 
				+                                                     {"n_past",  slot.n_past},
			
 
				+                                                     {"cached",  tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)},
			
--- a/llm/patches/02-shutdown.diff
+++ b/llm/patches/02-shutdown.diff
@@ -37,26 +37,18 @@ index 11dd82c3..311495a8 100644
 
				  
			
 
				      llama_backend_free();
			
 
				 diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
			
 
				-index 70cce072..2acb1eab 100644
			
 
				+index 70cce072..9124869a 100644
			
 
				 --- a/examples/server/utils.hpp
			
 
				 +++ b/examples/server/utils.hpp
			
 
				-@@ -6,6 +6,7 @@
			
 
				- #include <mutex>
			
 
				- #include <condition_variable>
			
 
				- #include <unordered_map>
			
 
				-+#include <atomic>
			
 
				- 
			
 
				- #include "json.hpp"
			
 
				- 
			
 
				-@@ -190,6 +191,7 @@ inline std::string format_chatml(std::vector<json> messages)
			
 
				+@@ -190,6 +190,7 @@ inline std::string format_chatml(std::vector<json> messages)
			
 
				  struct llama_server_queue {
			
 
				      int id = 0;
			
 
				      std::mutex mutex_tasks;
			
 
				-+    std::atomic<bool> running;
			
 
				++    bool running;
			
 
				      // queues
			
 
				      std::vector<task_server> queue_tasks;
			
 
				      std::vector<task_server> queue_tasks_deferred;
			
 
				-@@ -248,9 +250,15 @@ struct llama_server_queue {
			
 
				+@@ -248,9 +249,18 @@ struct llama_server_queue {
			
 
				          queue_tasks_deferred.clear();
			
 
				      }
			
 
				  
			
@@ -64,7 +56,10 @@ index 70cce072..2acb1eab 100644
 
				 -    [[noreturn]]
			
 
				 +    // end the start_loop routine
			
 
				 +    void terminate() {
			
 
				-+        running = false;
			
 
				++        {
			
 
				++            std::unique_lock<std::mutex> lock(mutex_tasks);
			
 
				++            running = false;
			
 
				++        }
			
 
				 +        condition_tasks.notify_all();
			
 
				 +    }
			
 
				 +
			
@@ -74,17 +69,17 @@ index 70cce072..2acb1eab 100644
 
				          while (true) {
			
 
				              // new task arrived
			
 
				              LOG_VERBOSE("have new task", {});
			
 
				-@@ -294,8 +302,12 @@ struct llama_server_queue {
			
 
				+@@ -294,8 +304,12 @@ struct llama_server_queue {
			
 
				              {
			
 
				                  std::unique_lock<std::mutex> lock(mutex_tasks);
			
 
				                  if (queue_tasks.empty()) {
			
 
				-+                    if (!running.load()) {
			
 
				++                    if (!running) {
			
 
				 +                        LOG_VERBOSE("ending start_loop", {});
			
 
				 +                        return;
			
 
				 +                    }
			
 
				                      condition_tasks.wait(lock, [&]{
			
 
				 -                        return !queue_tasks.empty();
			
 
				-+                        return (!queue_tasks.empty() || !running.load());
			
 
				++                        return (!queue_tasks.empty() || !running);
			
 
				                      });
			
 
				                  }
			
 
				              }