123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051 |
- From 032ef7ff2423f5117bb59d42fb71be9cebf0a2de Mon Sep 17 00:00:00 2001
- From: Bruce MacDonald <brucewmacdonald@gmail.com>
- Date: Mon, 28 Aug 2023 18:08:12 -0400
- Subject: [PATCH] add detokenize endpoint
- ---
- examples/server/server.cpp | 21 +++++++++++++++++++++
- 1 file changed, 21 insertions(+)
- diff --git a/examples/server/server.cpp b/examples/server/server.cpp
- index 9966045..5014691 100644
- --- a/examples/server/server.cpp
- +++ b/examples/server/server.cpp
- @@ -1075,6 +1075,12 @@ static json format_tokenizer_response(const std::vector<llama_token> &tokens)
- {"tokens", tokens}};
- }
-
- +static json format_detokenized_response(std::string content)
- +{
- + return json{
- + {"content", content}};
- +}
- +
- static void parse_options_completion(const json &body, llama_server_context &llama)
- {
- gpt_params default_params;
- @@ -1361,6 +1367,21 @@ int main(int argc, char **argv)
- const json data = format_tokenizer_response(tokens);
- return res.set_content(data.dump(), "application/json"); });
-
- + svr.Post("/detokenize", [&llama](const Request &req, Response &res)
- + {
- + auto lock = llama.lock();
- +
- + const json body = json::parse(req.body);
- + std::string content;
- + if (body.count("tokens") != 0)
- + {
- + const std::vector<llama_token> tokens = body["tokens"];
- + content = tokens_to_str(llama.ctx, tokens.cbegin(), tokens.cend());
- + }
- +
- + const json data = format_detokenized_response(content);
- + return res.set_content(data.dump(), "application/json"); });
- +
- svr.Post("/embedding", [&llama](const Request &req, Response &res)
- {
- auto lock = llama.lock();
- --
- 2.39.2 (Apple Git-143)
|