response.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. import json
  2. from open_webui.utils.misc import (
  3. openai_chat_chunk_message_template,
  4. openai_chat_completion_message_template,
  5. )
  6. def convert_response_ollama_to_openai(ollama_response: dict) -> dict:
  7. model = ollama_response.get("model", "ollama")
  8. message_content = ollama_response.get("message", {}).get("content", "")
  9. data = ollama_response
  10. usage = {
  11. "response_token/s": (
  12. round(
  13. (
  14. (
  15. data.get("eval_count", 0)
  16. / ((data.get("eval_duration", 0) / 10_000_000))
  17. )
  18. * 100
  19. ),
  20. 2,
  21. )
  22. if data.get("eval_duration", 0) > 0
  23. else "N/A"
  24. ),
  25. "prompt_token/s": (
  26. round(
  27. (
  28. (
  29. data.get("prompt_eval_count", 0)
  30. / ((data.get("prompt_eval_duration", 0) / 10_000_000))
  31. )
  32. * 100
  33. ),
  34. 2,
  35. )
  36. if data.get("prompt_eval_duration", 0) > 0
  37. else "N/A"
  38. ),
  39. "total_duration": data.get("total_duration", 0),
  40. "load_duration": data.get("load_duration", 0),
  41. "prompt_eval_count": data.get("prompt_eval_count", 0),
  42. "prompt_eval_duration": data.get("prompt_eval_duration", 0),
  43. "eval_count": data.get("eval_count", 0),
  44. "eval_duration": data.get("eval_duration", 0),
  45. "approximate_total": (lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s")(
  46. (data.get("total_duration", 0) or 0) // 1_000_000_000
  47. ),
  48. }
  49. response = openai_chat_completion_message_template(model, message_content, usage)
  50. return response
  51. async def convert_streaming_response_ollama_to_openai(ollama_streaming_response):
  52. async for data in ollama_streaming_response.body_iterator:
  53. data = json.loads(data)
  54. model = data.get("model", "ollama")
  55. message_content = data.get("message", {}).get("content", "")
  56. done = data.get("done", False)
  57. usage = None
  58. if done:
  59. usage = {
  60. "response_token/s": (
  61. round(
  62. (
  63. (
  64. data.get("eval_count", 0)
  65. / ((data.get("eval_duration", 0) / 10_000_000))
  66. )
  67. * 100
  68. ),
  69. 2,
  70. )
  71. if data.get("eval_duration", 0) > 0
  72. else "N/A"
  73. ),
  74. "prompt_token/s": (
  75. round(
  76. (
  77. (
  78. data.get("prompt_eval_count", 0)
  79. / ((data.get("prompt_eval_duration", 0) / 10_000_000))
  80. )
  81. * 100
  82. ),
  83. 2,
  84. )
  85. if data.get("prompt_eval_duration", 0) > 0
  86. else "N/A"
  87. ),
  88. "total_duration": data.get("total_duration", 0),
  89. "load_duration": data.get("load_duration", 0),
  90. "prompt_eval_count": data.get("prompt_eval_count", 0),
  91. "prompt_eval_duration": data.get("prompt_eval_duration", 0),
  92. "eval_count": data.get("eval_count", 0),
  93. "eval_duration": data.get("eval_duration", 0),
  94. "approximate_total": (
  95. lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s"
  96. )((data.get("total_duration", 0) or 0) // 1_000_000_000),
  97. }
  98. data = openai_chat_chunk_message_template(
  99. model, message_content if not done else None, usage
  100. )
  101. line = f"data: {json.dumps(data)}\n\n"
  102. yield line
  103. yield "data: [DONE]\n\n"