02-shutdown.diff 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. diff --git a/examples/server/server.cpp b/examples/server/server.cpp
  2. index 11dd82c3..311495a8 100644
  3. --- a/examples/server/server.cpp
  4. +++ b/examples/server/server.cpp
  5. @@ -28,6 +28,7 @@
  6. #include <chrono>
  7. #include <condition_variable>
  8. #include <atomic>
  9. +#include <signal.h>
  10. using json = nlohmann::json;
  11. @@ -2394,6 +2395,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
  12. }
  13. }
  14. +std::function<void(int)> shutdown_handler;
  15. +inline void signal_handler(int signal) { shutdown_handler(signal); }
  16. +
  17. int main(int argc, char **argv)
  18. {
  19. #if SERVER_VERBOSE != 1
  20. @@ -3014,8 +3018,14 @@ int main(int argc, char **argv)
  21. std::placeholders::_2,
  22. std::placeholders::_3
  23. ));
  24. - llama.queue_tasks.start_loop();
  25. + shutdown_handler = [&](int) {
  26. + llama.queue_tasks.terminate();
  27. + };
  28. + signal(SIGTERM, signal_handler);
  29. + signal(SIGINT, signal_handler);
  30. + llama.queue_tasks.start_loop();
  31. + svr.stop();
  32. t.join();
  33. llama_backend_free();
  34. diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
  35. index 70cce072..9124869a 100644
  36. --- a/examples/server/utils.hpp
  37. +++ b/examples/server/utils.hpp
  38. @@ -190,6 +190,7 @@ inline std::string format_chatml(std::vector<json> messages)
  39. struct llama_server_queue {
  40. int id = 0;
  41. std::mutex mutex_tasks;
  42. + bool running;
  43. // queues
  44. std::vector<task_server> queue_tasks;
  45. std::vector<task_server> queue_tasks_deferred;
  46. @@ -248,9 +249,18 @@ struct llama_server_queue {
  47. queue_tasks_deferred.clear();
  48. }
  49. - // Start the main loop. This call is blocking
  50. - [[noreturn]]
  51. + // end the start_loop routine
  52. + void terminate() {
  53. + {
  54. + std::unique_lock<std::mutex> lock(mutex_tasks);
  55. + running = false;
  56. + }
  57. + condition_tasks.notify_all();
  58. + }
  59. +
  60. + // Start the main loop.
  61. void start_loop() {
  62. + running = true;
  63. while (true) {
  64. // new task arrived
  65. LOG_VERBOSE("have new task", {});
  66. @@ -294,8 +304,12 @@ struct llama_server_queue {
  67. {
  68. std::unique_lock<std::mutex> lock(mutex_tasks);
  69. if (queue_tasks.empty()) {
  70. + if (!running) {
  71. + LOG_VERBOSE("ending start_loop", {});
  72. + return;
  73. + }
  74. condition_tasks.wait(lock, [&]{
  75. - return !queue_tasks.empty();
  76. + return (!queue_tasks.empty() || !running);
  77. });
  78. }
  79. }