response.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. import json
  2. from uuid import uuid4
  3. from open_webui.utils.misc import (
  4. openai_chat_chunk_message_template,
  5. openai_chat_completion_message_template,
  6. )
  7. def convert_ollama_tool_call_to_openai(tool_calls: dict) -> dict:
  8. openai_tool_calls = []
  9. for tool_call in tool_calls:
  10. openai_tool_call = {
  11. "index": tool_call.get("index", 0),
  12. "id": tool_call.get("id", f"call_{str(uuid4())}"),
  13. "type": "function",
  14. "function": {
  15. "name": tool_call.get("function", {}).get("name", ""),
  16. "arguments": json.dumps(
  17. tool_call.get("function", {}).get("arguments", {})
  18. ),
  19. },
  20. }
  21. openai_tool_calls.append(openai_tool_call)
  22. return openai_tool_calls
  23. def convert_ollama_usage_to_openai(data: dict) -> dict:
  24. return {
  25. "response_token/s": (
  26. round(
  27. (
  28. (
  29. data.get("eval_count", 0)
  30. / ((data.get("eval_duration", 0) / 10_000_000))
  31. )
  32. * 100
  33. ),
  34. 2,
  35. )
  36. if data.get("eval_duration", 0) > 0
  37. else "N/A"
  38. ),
  39. "prompt_token/s": (
  40. round(
  41. (
  42. (
  43. data.get("prompt_eval_count", 0)
  44. / ((data.get("prompt_eval_duration", 0) / 10_000_000))
  45. )
  46. * 100
  47. ),
  48. 2,
  49. )
  50. if data.get("prompt_eval_duration", 0) > 0
  51. else "N/A"
  52. ),
  53. "total_duration": data.get("total_duration", 0),
  54. "load_duration": data.get("load_duration", 0),
  55. "prompt_eval_count": data.get("prompt_eval_count", 0),
  56. "prompt_tokens": int(
  57. data.get("prompt_eval_count", 0)
  58. ), # This is the OpenAI compatible key
  59. "prompt_eval_duration": data.get("prompt_eval_duration", 0),
  60. "eval_count": data.get("eval_count", 0),
  61. "completion_tokens": int(
  62. data.get("eval_count", 0)
  63. ), # This is the OpenAI compatible key
  64. "eval_duration": data.get("eval_duration", 0),
  65. "approximate_total": (lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s")(
  66. (data.get("total_duration", 0) or 0) // 1_000_000_000
  67. ),
  68. "total_tokens": int( # This is the OpenAI compatible key
  69. data.get("prompt_eval_count", 0) + data.get("eval_count", 0)
  70. ),
  71. "completion_tokens_details": { # This is the OpenAI compatible key
  72. "reasoning_tokens": 0,
  73. "accepted_prediction_tokens": 0,
  74. "rejected_prediction_tokens": 0,
  75. },
  76. }
  77. def convert_response_ollama_to_openai(ollama_response: dict) -> dict:
  78. model = ollama_response.get("model", "ollama")
  79. message_content = ollama_response.get("message", {}).get("content", "")
  80. tool_calls = ollama_response.get("message", {}).get("tool_calls", None)
  81. openai_tool_calls = None
  82. if tool_calls:
  83. openai_tool_calls = convert_ollama_tool_call_to_openai(tool_calls)
  84. data = ollama_response
  85. usage = convert_ollama_usage_to_openai(data)
  86. response = openai_chat_completion_message_template(
  87. model, message_content, openai_tool_calls, usage
  88. )
  89. return response
  90. async def convert_streaming_response_ollama_to_openai(ollama_streaming_response):
  91. async for data in ollama_streaming_response.body_iterator:
  92. data = json.loads(data)
  93. model = data.get("model", "ollama")
  94. message_content = data.get("message", {}).get("content", "")
  95. tool_calls = data.get("message", {}).get("tool_calls", None)
  96. openai_tool_calls = None
  97. if tool_calls:
  98. openai_tool_calls = convert_ollama_tool_call_to_openai(tool_calls)
  99. done = data.get("done", False)
  100. usage = None
  101. if done:
  102. usage = convert_ollama_usage_to_openai(data)
  103. data = openai_chat_chunk_message_template(
  104. model, message_content if not done else None, openai_tool_calls, usage
  105. )
  106. line = f"data: {json.dumps(data)}\n\n"
  107. yield line
  108. yield "data: [DONE]\n\n"