response.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. import json
  2. from open_webui.utils.misc import (
  3. openai_chat_chunk_message_template,
  4. openai_chat_completion_message_template,
  5. )
  6. def convert_response_ollama_to_openai(ollama_response: dict) -> dict:
  7. model = ollama_response.get("model", "ollama")
  8. message_content = ollama_response.get("message", {}).get("content", "")
  9. response = openai_chat_completion_message_template(model, message_content)
  10. return response
  11. async def convert_streaming_response_ollama_to_openai(ollama_streaming_response):
  12. async for data in ollama_streaming_response.body_iterator:
  13. data = json.loads(data)
  14. model = data.get("model", "ollama")
  15. message_content = data.get("message", {}).get("content", "")
  16. done = data.get("done", False)
  17. usage = None
  18. if done:
  19. usage = {
  20. "response_token/s": (
  21. round(
  22. (
  23. (
  24. data.get("eval_count", 0)
  25. / ((data.get("eval_duration", 0) / 1_000_000_000))
  26. )
  27. * 100
  28. ),
  29. 2,
  30. )
  31. if data.get("eval_duration", 0) > 0
  32. else "N/A"
  33. ),
  34. "prompt_token/s": (
  35. round(
  36. (
  37. (
  38. data.get("prompt_eval_count", 0)
  39. / (
  40. (
  41. data.get("prompt_eval_duration", 0)
  42. / 1_000_000_000
  43. )
  44. )
  45. )
  46. * 100
  47. ),
  48. 2,
  49. )
  50. if data.get("prompt_eval_duration", 0) > 0
  51. else "N/A"
  52. ),
  53. "total_duration": round(
  54. ((data.get("total_duration", 0) / 1_000_000) * 100), 2
  55. ),
  56. "load_duration": round(
  57. ((data.get("load_duration", 0) / 1_000_000) * 100), 2
  58. ),
  59. "prompt_eval_count": data.get("prompt_eval_count", 0),
  60. "prompt_eval_duration": round(
  61. ((data.get("prompt_eval_duration", 0) / 1_000_000) * 100), 2
  62. ),
  63. "eval_count": data.get("eval_count", 0),
  64. "eval_duration": round(
  65. ((data.get("eval_duration", 0) / 1_000_000) * 100), 2
  66. ),
  67. "approximate_total": (
  68. lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s"
  69. )((data.get("total_duration", 0) or 0) // 1_000_000_000),
  70. }
  71. data = openai_chat_chunk_message_template(
  72. model, message_content if not done else None, usage
  73. )
  74. line = f"data: {json.dumps(data)}\n\n"
  75. yield line
  76. yield "data: [DONE]\n\n"