response.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. import json
  2. from uuid import uuid4
  3. from open_webui.utils.misc import (
  4. openai_chat_chunk_message_template,
  5. openai_chat_completion_message_template,
  6. )
  7. def convert_ollama_tool_call_to_openai(tool_calls: dict) -> dict:
  8. openai_tool_calls = []
  9. for tool_call in tool_calls:
  10. openai_tool_call = {
  11. "index": tool_call.get("index", 0),
  12. "id": tool_call.get("id", f"call_{str(uuid4())}"),
  13. "type": "function",
  14. "function": {
  15. "name": tool_call.get("function", {}).get("name", ""),
  16. "arguments": json.dumps(
  17. tool_call.get('function', {}).get('arguments', {})
  18. ),
  19. },
  20. }
  21. openai_tool_calls.append(openai_tool_call)
  22. return openai_tool_calls
  23. def convert_response_ollama_to_openai(ollama_response: dict) -> dict:
  24. model = ollama_response.get("model", "ollama")
  25. message_content = ollama_response.get("message", {}).get("content", "")
  26. tool_calls = ollama_response.get("message", {}).get("tool_calls", None)
  27. openai_tool_calls = None
  28. if tool_calls:
  29. openai_tool_calls = convert_ollama_tool_call_to_openai(tool_calls)
  30. data = ollama_response
  31. usage = {
  32. "response_token/s": (
  33. round(
  34. (
  35. (
  36. data.get("eval_count", 0)
  37. / ((data.get("eval_duration", 0) / 10_000_000))
  38. )
  39. * 100
  40. ),
  41. 2,
  42. )
  43. if data.get("eval_duration", 0) > 0
  44. else "N/A"
  45. ),
  46. "prompt_token/s": (
  47. round(
  48. (
  49. (
  50. data.get("prompt_eval_count", 0)
  51. / ((data.get("prompt_eval_duration", 0) / 10_000_000))
  52. )
  53. * 100
  54. ),
  55. 2,
  56. )
  57. if data.get("prompt_eval_duration", 0) > 0
  58. else "N/A"
  59. ),
  60. "total_duration": data.get("total_duration", 0),
  61. "load_duration": data.get("load_duration", 0),
  62. "prompt_eval_count": data.get("prompt_eval_count", 0),
  63. "prompt_eval_duration": data.get("prompt_eval_duration", 0),
  64. "eval_count": data.get("eval_count", 0),
  65. "eval_duration": data.get("eval_duration", 0),
  66. "approximate_total": (lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s")(
  67. (data.get("total_duration", 0) or 0) // 1_000_000_000
  68. ),
  69. }
  70. response = openai_chat_completion_message_template(model, message_content, openai_tool_calls, usage)
  71. return response
  72. async def convert_streaming_response_ollama_to_openai(ollama_streaming_response):
  73. async for data in ollama_streaming_response.body_iterator:
  74. data = json.loads(data)
  75. model = data.get("model", "ollama")
  76. message_content = data.get("message", {}).get("content", "")
  77. tool_calls = data.get("message", {}).get("tool_calls", None)
  78. openai_tool_calls = None
  79. if tool_calls:
  80. openai_tool_calls = convert_ollama_tool_call_to_openai(tool_calls)
  81. done = data.get("done", False)
  82. usage = None
  83. if done:
  84. usage = {
  85. "response_token/s": (
  86. round(
  87. (
  88. (
  89. data.get("eval_count", 0)
  90. / ((data.get("eval_duration", 0) / 10_000_000))
  91. )
  92. * 100
  93. ),
  94. 2,
  95. )
  96. if data.get("eval_duration", 0) > 0
  97. else "N/A"
  98. ),
  99. "prompt_token/s": (
  100. round(
  101. (
  102. (
  103. data.get("prompt_eval_count", 0)
  104. / ((data.get("prompt_eval_duration", 0) / 10_000_000))
  105. )
  106. * 100
  107. ),
  108. 2,
  109. )
  110. if data.get("prompt_eval_duration", 0) > 0
  111. else "N/A"
  112. ),
  113. "total_duration": data.get("total_duration", 0),
  114. "load_duration": data.get("load_duration", 0),
  115. "prompt_eval_count": data.get("prompt_eval_count", 0),
  116. "prompt_eval_duration": data.get("prompt_eval_duration", 0),
  117. "eval_count": data.get("eval_count", 0),
  118. "eval_duration": data.get("eval_duration", 0),
  119. "approximate_total": (
  120. lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s"
  121. )((data.get("total_duration", 0) or 0) // 1_000_000_000),
  122. }
  123. data = openai_chat_chunk_message_template(
  124. model, message_content if not done else None, openai_tool_calls, usage
  125. )
  126. line = f"data: {json.dumps(data)}\n\n"
  127. yield line
  128. yield "data: [DONE]\n\n"