0001-add-detokenize-endpoint.patch 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. From 032ef7ff2423f5117bb59d42fb71be9cebf0a2de Mon Sep 17 00:00:00 2001
  2. From: Bruce MacDonald <brucewmacdonald@gmail.com>
  3. Date: Mon, 28 Aug 2023 18:08:12 -0400
  4. Subject: [PATCH] add detokenize endpoint
  5. ---
  6. examples/server/server.cpp | 21 +++++++++++++++++++++
  7. 1 file changed, 21 insertions(+)
  8. diff --git a/examples/server/server.cpp b/examples/server/server.cpp
  9. index 9966045..5014691 100644
  10. --- a/examples/server/server.cpp
  11. +++ b/examples/server/server.cpp
  12. @@ -1075,6 +1075,12 @@ static json format_tokenizer_response(const std::vector<llama_token> &tokens)
  13. {"tokens", tokens}};
  14. }
  15. +static json format_detokenized_response(std::string content)
  16. +{
  17. + return json{
  18. + {"content", content}};
  19. +}
  20. +
  21. static void parse_options_completion(const json &body, llama_server_context &llama)
  22. {
  23. gpt_params default_params;
  24. @@ -1361,6 +1367,21 @@ int main(int argc, char **argv)
  25. const json data = format_tokenizer_response(tokens);
  26. return res.set_content(data.dump(), "application/json"); });
  27. + svr.Post("/detokenize", [&llama](const Request &req, Response &res)
  28. + {
  29. + auto lock = llama.lock();
  30. +
  31. + const json body = json::parse(req.body);
  32. + std::string content;
  33. + if (body.count("tokens") != 0)
  34. + {
  35. + const std::vector<llama_token> tokens = body["tokens"];
  36. + content = tokens_to_str(llama.ctx, tokens.cbegin(), tokens.cend());
  37. + }
  38. +
  39. + const json data = format_detokenized_response(content);
  40. + return res.set_content(data.dump(), "application/json"); });
  41. +
  42. svr.Post("/embedding", [&llama](const Request &req, Response &res)
  43. {
  44. auto lock = llama.lock();
  45. --
  46. 2.39.2 (Apple Git-143)