response.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. import json
  2. from uuid import uuid4
  3. from open_webui.utils.misc import (
  4. openai_chat_chunk_message_template,
  5. openai_chat_completion_message_template,
  6. )
  7. def convert_response_ollama_to_openai(ollama_response: dict) -> dict:
  8. model = ollama_response.get("model", "ollama")
  9. message_content = ollama_response.get("message", {}).get("content", "")
  10. data = ollama_response
  11. usage = {
  12. "response_token/s": (
  13. round(
  14. (
  15. (
  16. data.get("eval_count", 0)
  17. / ((data.get("eval_duration", 0) / 10_000_000))
  18. )
  19. * 100
  20. ),
  21. 2,
  22. )
  23. if data.get("eval_duration", 0) > 0
  24. else "N/A"
  25. ),
  26. "prompt_token/s": (
  27. round(
  28. (
  29. (
  30. data.get("prompt_eval_count", 0)
  31. / ((data.get("prompt_eval_duration", 0) / 10_000_000))
  32. )
  33. * 100
  34. ),
  35. 2,
  36. )
  37. if data.get("prompt_eval_duration", 0) > 0
  38. else "N/A"
  39. ),
  40. "total_duration": data.get("total_duration", 0),
  41. "load_duration": data.get("load_duration", 0),
  42. "prompt_eval_count": data.get("prompt_eval_count", 0),
  43. "prompt_eval_duration": data.get("prompt_eval_duration", 0),
  44. "eval_count": data.get("eval_count", 0),
  45. "eval_duration": data.get("eval_duration", 0),
  46. "approximate_total": (lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s")(
  47. (data.get("total_duration", 0) or 0) // 1_000_000_000
  48. ),
  49. }
  50. response = openai_chat_completion_message_template(model, message_content, usage)
  51. return response
  52. async def convert_streaming_response_ollama_to_openai(ollama_streaming_response):
  53. async for data in ollama_streaming_response.body_iterator:
  54. data = json.loads(data)
  55. model = data.get("model", "ollama")
  56. message_content = data.get("message", {}).get("content", "")
  57. tool_calls = data.get("message", {}).get("tool_calls", None)
  58. openai_tool_calls = None
  59. if tool_calls:
  60. openai_tool_calls = []
  61. for tool_call in tool_calls:
  62. openai_tool_call = {
  63. "index": tool_call.get("index", 0),
  64. "id": tool_call.get("id", f"call_{str(uuid4())}"),
  65. "type": "function",
  66. "function": {
  67. "name": tool_call.get("function", {}).get("name", ""),
  68. "arguments": json.dumps(
  69. tool_call.get("function", {}).get("arguments", {})
  70. ),
  71. },
  72. }
  73. openai_tool_calls.append(openai_tool_call)
  74. done = data.get("done", False)
  75. usage = None
  76. if done:
  77. usage = {
  78. "response_token/s": (
  79. round(
  80. (
  81. (
  82. data.get("eval_count", 0)
  83. / ((data.get("eval_duration", 0) / 10_000_000))
  84. )
  85. * 100
  86. ),
  87. 2,
  88. )
  89. if data.get("eval_duration", 0) > 0
  90. else "N/A"
  91. ),
  92. "prompt_token/s": (
  93. round(
  94. (
  95. (
  96. data.get("prompt_eval_count", 0)
  97. / ((data.get("prompt_eval_duration", 0) / 10_000_000))
  98. )
  99. * 100
  100. ),
  101. 2,
  102. )
  103. if data.get("prompt_eval_duration", 0) > 0
  104. else "N/A"
  105. ),
  106. "total_duration": data.get("total_duration", 0),
  107. "load_duration": data.get("load_duration", 0),
  108. "prompt_eval_count": data.get("prompt_eval_count", 0),
  109. "prompt_eval_duration": data.get("prompt_eval_duration", 0),
  110. "eval_count": data.get("eval_count", 0),
  111. "eval_duration": data.get("eval_duration", 0),
  112. "approximate_total": (
  113. lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s"
  114. )((data.get("total_duration", 0) or 0) // 1_000_000_000),
  115. }
  116. data = openai_chat_chunk_message_template(
  117. model, message_content if not done else None, openai_tool_calls, usage
  118. )
  119. line = f"data: {json.dumps(data)}\n\n"
  120. yield line
  121. yield "data: [DONE]\n\n"