response.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. import json
  2. from uuid import uuid4
  3. from open_webui.utils.misc import (
  4. openai_chat_chunk_message_template,
  5. openai_chat_completion_message_template,
  6. )
  7. def convert_ollama_tool_call_to_openai(tool_calls: dict) -> dict:
  8. openai_tool_calls = []
  9. for tool_call in tool_calls:
  10. openai_tool_call = {
  11. "index": tool_call.get("index", 0),
  12. "id": tool_call.get("id", f"call_{str(uuid4())}"),
  13. "type": "function",
  14. "function": {
  15. "name": tool_call.get("function", {}).get("name", ""),
  16. "arguments": json.dumps(
  17. tool_call.get("function", {}).get("arguments", {})
  18. ),
  19. },
  20. }
  21. openai_tool_calls.append(openai_tool_call)
  22. return openai_tool_calls
  23. def convert_response_ollama_to_openai(ollama_response: dict) -> dict:
  24. model = ollama_response.get("model", "ollama")
  25. message_content = ollama_response.get("message", {}).get("content", "")
  26. tool_calls = ollama_response.get("message", {}).get("tool_calls", None)
  27. openai_tool_calls = None
  28. if tool_calls:
  29. openai_tool_calls = convert_ollama_tool_call_to_openai(tool_calls)
  30. data = ollama_response
  31. usage = {
  32. "response_token/s": (
  33. round(
  34. (
  35. (
  36. data.get("eval_count", 0)
  37. / ((data.get("eval_duration", 0) / 10_000_000))
  38. )
  39. * 100
  40. ),
  41. 2,
  42. )
  43. if data.get("eval_duration", 0) > 0
  44. else "N/A"
  45. ),
  46. "prompt_token/s": (
  47. round(
  48. (
  49. (
  50. data.get("prompt_eval_count", 0)
  51. / ((data.get("prompt_eval_duration", 0) / 10_000_000))
  52. )
  53. * 100
  54. ),
  55. 2,
  56. )
  57. if data.get("prompt_eval_duration", 0) > 0
  58. else "N/A"
  59. ),
  60. "total_duration": data.get("total_duration", 0),
  61. "load_duration": data.get("load_duration", 0),
  62. "prompt_eval_count": data.get("prompt_eval_count", 0),
  63. "prompt_eval_duration": data.get("prompt_eval_duration", 0),
  64. "eval_count": data.get("eval_count", 0),
  65. "eval_duration": data.get("eval_duration", 0),
  66. "approximate_total": (lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s")(
  67. (data.get("total_duration", 0) or 0) // 1_000_000_000
  68. ),
  69. }
  70. response = openai_chat_completion_message_template(
  71. model, message_content, openai_tool_calls, usage
  72. )
  73. return response
  74. async def convert_streaming_response_ollama_to_openai(ollama_streaming_response):
  75. async for data in ollama_streaming_response.body_iterator:
  76. data = json.loads(data)
  77. model = data.get("model", "ollama")
  78. message_content = data.get("message", {}).get("content", "")
  79. tool_calls = data.get("message", {}).get("tool_calls", None)
  80. openai_tool_calls = None
  81. if tool_calls:
  82. openai_tool_calls = convert_ollama_tool_call_to_openai(tool_calls)
  83. done = data.get("done", False)
  84. usage = None
  85. if done:
  86. usage = {
  87. "response_token/s": (
  88. round(
  89. (
  90. (
  91. data.get("eval_count", 0)
  92. / ((data.get("eval_duration", 0) / 10_000_000))
  93. )
  94. * 100
  95. ),
  96. 2,
  97. )
  98. if data.get("eval_duration", 0) > 0
  99. else "N/A"
  100. ),
  101. "prompt_token/s": (
  102. round(
  103. (
  104. (
  105. data.get("prompt_eval_count", 0)
  106. / ((data.get("prompt_eval_duration", 0) / 10_000_000))
  107. )
  108. * 100
  109. ),
  110. 2,
  111. )
  112. if data.get("prompt_eval_duration", 0) > 0
  113. else "N/A"
  114. ),
  115. "total_duration": data.get("total_duration", 0),
  116. "load_duration": data.get("load_duration", 0),
  117. "prompt_eval_count": data.get("prompt_eval_count", 0),
  118. "prompt_eval_duration": data.get("prompt_eval_duration", 0),
  119. "eval_count": data.get("eval_count", 0),
  120. "eval_duration": data.get("eval_duration", 0),
  121. "approximate_total": (
  122. lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s"
  123. )((data.get("total_duration", 0) or 0) // 1_000_000_000),
  124. }
  125. data = openai_chat_chunk_message_template(
  126. model, message_content if not done else None, openai_tool_calls, usage
  127. )
  128. line = f"data: {json.dumps(data)}\n\n"
  129. yield line
  130. yield "data: [DONE]\n\n"