01-cache.diff 654 B

123456789101112131415161718192021
  1. diff --git a/examples/server/server.cpp b/examples/server/server.cpp
  2. index 2b2f4a0f..afac49af 100644
  3. --- a/examples/server/server.cpp
  4. +++ b/examples/server/server.cpp
  5. @@ -997,13 +997,15 @@ struct llama_server_context
  6. slot.n_sent_text += result.text_to_send.size();
  7. // add the token to slot queue and cache
  8. }
  9. - slot.add_token_string(result);
  10. +
  11. if (slot.params.stream)
  12. {
  13. send_partial_response(slot, result);
  14. }
  15. }
  16. + slot.add_token_string(result);
  17. +
  18. if (incomplete)
  19. {
  20. slot.has_next_token = true;