02-shutdown.diff 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. diff --git a/examples/server/server.cpp b/examples/server/server.cpp
  2. index 11dd82c3..311495a8 100644
  3. --- a/examples/server/server.cpp
  4. +++ b/examples/server/server.cpp
  5. @@ -28,6 +28,7 @@
  6. #include <chrono>
  7. #include <condition_variable>
  8. #include <atomic>
  9. +#include <signal.h>
  10. using json = nlohmann::json;
  11. @@ -2394,6 +2395,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
  12. }
  13. }
  14. +std::function<void(int)> shutdown_handler;
  15. +inline void signal_handler(int signal) { shutdown_handler(signal); }
  16. +
  17. int main(int argc, char **argv)
  18. {
  19. #if SERVER_VERBOSE != 1
  20. @@ -3014,8 +3018,14 @@ int main(int argc, char **argv)
  21. std::placeholders::_2,
  22. std::placeholders::_3
  23. ));
  24. - llama.queue_tasks.start_loop();
  25. + shutdown_handler = [&](int) {
  26. + llama.queue_tasks.terminate();
  27. + };
  28. + signal(SIGTERM, signal_handler);
  29. + signal(SIGINT, signal_handler);
  30. + llama.queue_tasks.start_loop();
  31. + svr.stop();
  32. t.join();
  33. llama_backend_free();
  34. diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
  35. index 70cce072..2acb1eab 100644
  36. --- a/examples/server/utils.hpp
  37. +++ b/examples/server/utils.hpp
  38. @@ -6,6 +6,7 @@
  39. #include <mutex>
  40. #include <condition_variable>
  41. #include <unordered_map>
  42. +#include <atomic>
  43. #include "json.hpp"
  44. @@ -190,6 +191,7 @@ inline std::string format_chatml(std::vector<json> messages)
  45. struct llama_server_queue {
  46. int id = 0;
  47. std::mutex mutex_tasks;
  48. + std::atomic<bool> running;
  49. // queues
  50. std::vector<task_server> queue_tasks;
  51. std::vector<task_server> queue_tasks_deferred;
  52. @@ -248,9 +250,15 @@ struct llama_server_queue {
  53. queue_tasks_deferred.clear();
  54. }
  55. - // Start the main loop. This call is blocking
  56. - [[noreturn]]
  57. + // end the start_loop routine
  58. + void terminate() {
  59. + running = false;
  60. + condition_tasks.notify_all();
  61. + }
  62. +
  63. + // Start the main loop.
  64. void start_loop() {
  65. + running = true;
  66. while (true) {
  67. // new task arrived
  68. LOG_VERBOSE("have new task", {});
  69. @@ -294,8 +302,12 @@ struct llama_server_queue {
  70. {
  71. std::unique_lock<std::mutex> lock(mutex_tasks);
  72. if (queue_tasks.empty()) {
  73. + if (!running.load()) {
  74. + LOG_VERBOSE("ending start_loop", {});
  75. + return;
  76. + }
  77. condition_tasks.wait(lock, [&]{
  78. - return !queue_tasks.empty();
  79. + return (!queue_tasks.empty() || !running.load());
  80. });
  81. }
  82. }