response.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. import json
  2. from uuid import uuid4
  3. from open_webui.utils.misc import (
  4. openai_chat_chunk_message_template,
  5. openai_chat_completion_message_template,
  6. )
  7. def convert_response_ollama_to_openai(ollama_response: dict) -> dict:
  8. model = ollama_response.get("model", "ollama")
  9. message_content = ollama_response.get("message", {}).get("content", "")
  10. data = ollama_response
  11. usage = {
  12. "response_token/s": (
  13. round(
  14. (
  15. (
  16. data.get("eval_count", 0)
  17. / ((data.get("eval_duration", 0) / 10_000_000))
  18. )
  19. * 100
  20. ),
  21. 2,
  22. )
  23. if data.get("eval_duration", 0) > 0
  24. else "N/A"
  25. ),
  26. "prompt_token/s": (
  27. round(
  28. (
  29. (
  30. data.get("prompt_eval_count", 0)
  31. / ((data.get("prompt_eval_duration", 0) / 10_000_000))
  32. )
  33. * 100
  34. ),
  35. 2,
  36. )
  37. if data.get("prompt_eval_duration", 0) > 0
  38. else "N/A"
  39. ),
  40. "total_duration": data.get("total_duration", 0),
  41. "load_duration": data.get("load_duration", 0),
  42. "prompt_eval_count": data.get("prompt_eval_count", 0),
  43. "prompt_eval_duration": data.get("prompt_eval_duration", 0),
  44. "eval_count": data.get("eval_count", 0),
  45. "eval_duration": data.get("eval_duration", 0),
  46. "approximate_total": (lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s")(
  47. (data.get("total_duration", 0) or 0) // 1_000_000_000
  48. ),
  49. }
  50. response = openai_chat_completion_message_template(model, message_content, usage)
  51. return response
  52. async def convert_streaming_response_ollama_to_openai(ollama_streaming_response):
  53. async for data in ollama_streaming_response.body_iterator:
  54. data = json.loads(data)
  55. model = data.get("model", "ollama")
  56. message_content = data.get("message", {}).get("content", "")
  57. tool_calls = data.get("message", {}).get("tool_calls", None)
  58. openai_tool_calls = None
  59. if tool_calls:
  60. openai_tool_calls = []
  61. for tool_call in tool_calls:
  62. openai_tool_call = {
  63. "index": tool_call.get("index", 0),
  64. "id": tool_call.get("id", f"call_{str(uuid4())}"),
  65. "type": "function",
  66. "function": {
  67. "name": tool_call.get("function", {}).get("name", ""),
  68. "arguments": f"{tool_call.get('function', {}).get('arguments', {})}",
  69. },
  70. }
  71. openai_tool_calls.append(openai_tool_call)
  72. done = data.get("done", False)
  73. usage = None
  74. if done:
  75. usage = {
  76. "response_token/s": (
  77. round(
  78. (
  79. (
  80. data.get("eval_count", 0)
  81. / ((data.get("eval_duration", 0) / 10_000_000))
  82. )
  83. * 100
  84. ),
  85. 2,
  86. )
  87. if data.get("eval_duration", 0) > 0
  88. else "N/A"
  89. ),
  90. "prompt_token/s": (
  91. round(
  92. (
  93. (
  94. data.get("prompt_eval_count", 0)
  95. / ((data.get("prompt_eval_duration", 0) / 10_000_000))
  96. )
  97. * 100
  98. ),
  99. 2,
  100. )
  101. if data.get("prompt_eval_duration", 0) > 0
  102. else "N/A"
  103. ),
  104. "total_duration": data.get("total_duration", 0),
  105. "load_duration": data.get("load_duration", 0),
  106. "prompt_eval_count": data.get("prompt_eval_count", 0),
  107. "prompt_eval_duration": data.get("prompt_eval_duration", 0),
  108. "eval_count": data.get("eval_count", 0),
  109. "eval_duration": data.get("eval_duration", 0),
  110. "approximate_total": (
  111. lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s"
  112. )((data.get("total_duration", 0) or 0) // 1_000_000_000),
  113. }
  114. data = openai_chat_chunk_message_template(
  115. model, message_content if not done else None, openai_tool_calls, usage
  116. )
  117. line = f"data: {json.dumps(data)}\n\n"
  118. yield line
  119. yield "data: [DONE]\n\n"