123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 |
- diff --git a/examples/server/server.cpp b/examples/server/server.cpp
- index a0b46970..7800c6e7 100644
- --- a/examples/server/server.cpp
- +++ b/examples/server/server.cpp
- @@ -28,6 +28,7 @@
- #include <chrono>
- #include <condition_variable>
- #include <atomic>
- +#include <signal.h>
-
- using json = nlohmann::json;
-
- @@ -2511,6 +2512,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
- }
- }
-
- +std::function<void(int)> shutdown_handler;
- +inline void signal_handler(int signal) { shutdown_handler(signal); }
- +
- int main(int argc, char **argv)
- {
- #if SERVER_VERBOSE != 1
- @@ -3128,8 +3132,25 @@ int main(int argc, char **argv)
- std::placeholders::_2,
- std::placeholders::_3
- ));
- - llama.queue_tasks.start_loop();
-
- + shutdown_handler = [&](int) {
- + llama.queue_tasks.terminate();
- + };
- +
- +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
- + struct sigaction sigint_action;
- + sigint_action.sa_handler = signal_handler;
- + sigemptyset (&sigint_action.sa_mask);
- + sigint_action.sa_flags = 0;
- + sigaction(SIGINT, &sigint_action, NULL);
- +#elif defined (_WIN32)
- + auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
- + return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
- + };
- + SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
- +#endif
- + llama.queue_tasks.start_loop();
- + svr.stop();
- t.join();
-
- llama_backend_free();
- diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
- index 54854896..0ee670db 100644
- --- a/examples/server/utils.hpp
- +++ b/examples/server/utils.hpp
- @@ -220,6 +220,7 @@ inline std::string format_chatml(std::vector<json> messages)
- struct llama_server_queue {
- int id = 0;
- std::mutex mutex_tasks;
- + bool running;
- // queues
- std::vector<task_server> queue_tasks;
- std::vector<task_server> queue_tasks_deferred;
- @@ -278,9 +279,18 @@ struct llama_server_queue {
- queue_tasks_deferred.clear();
- }
-
- - // Start the main loop. This call is blocking
- - [[noreturn]]
- + // end the start_loop routine
- + void terminate() {
- + {
- + std::unique_lock<std::mutex> lock(mutex_tasks);
- + running = false;
- + }
- + condition_tasks.notify_all();
- + }
- +
- + // Start the main loop.
- void start_loop() {
- + running = true;
- while (true) {
- // new task arrived
- LOG_VERBOSE("have new task", {});
- @@ -324,8 +334,12 @@ struct llama_server_queue {
- {
- std::unique_lock<std::mutex> lock(mutex_tasks);
- if (queue_tasks.empty()) {
- + if (!running) {
- + LOG_VERBOSE("ending start_loop", {});
- + return;
- + }
- condition_tasks.wait(lock, [&]{
- - return !queue_tasks.empty();
- + return (!queue_tasks.empty() || !running);
- });
- }
- }
|