01-cache.diff 653 B

123456789101112131415161718192021
  1. diff --git a/examples/server/server.cpp b/examples/server/server.cpp
  2. index d86d7e04..2694e92e 100644
  3. --- a/examples/server/server.cpp
  4. +++ b/examples/server/server.cpp
  5. @@ -901,13 +901,15 @@ struct llama_server_context
  6. slot.sent_count += result.text_to_send.size();
  7. // add the token to slot queue and cache
  8. }
  9. - slot.add_token_string(result);
  10. +
  11. if (slot.params.stream)
  12. {
  13. send_partial_response(slot, result);
  14. }
  15. }
  16. + slot.add_token_string(result);
  17. +
  18. if (incomplete)
  19. {
  20. slot.has_next_token = true;