response.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. import json
  2. from open_webui.utils.misc import (
  3. openai_chat_chunk_message_template,
  4. openai_chat_completion_message_template,
  5. )
  6. def convert_response_ollama_to_openai(ollama_response: dict) -> dict:
  7. model = ollama_response.get("model", "ollama")
  8. message_content = ollama_response.get("message", {}).get("content", "")
  9. response = openai_chat_completion_message_template(model, message_content)
  10. return response
  11. async def convert_streaming_response_ollama_to_openai(ollama_streaming_response):
  12. async for data in ollama_streaming_response.body_iterator:
  13. data = json.loads(data)
  14. model = data.get("model", "ollama")
  15. message_content = data.get("message", {}).get("content", "")
  16. done = data.get("done", False)
  17. usage = None
  18. if done:
  19. usage = {
  20. "response_token/s": (
  21. round(
  22. (
  23. (
  24. data.get("eval_count", 0)
  25. / ((data.get("eval_duration", 0) / 1_000_000))
  26. )
  27. * 100
  28. ),
  29. 2,
  30. )
  31. if data.get("eval_duration", 0) > 0
  32. else "N/A"
  33. ),
  34. "prompt_token/s": (
  35. round(
  36. (
  37. (
  38. data.get("prompt_eval_count", 0)
  39. / ((data.get("prompt_eval_duration", 0) / 1_000_000))
  40. )
  41. * 100
  42. ),
  43. 2,
  44. )
  45. if data.get("prompt_eval_duration", 0) > 0
  46. else "N/A"
  47. ),
  48. "total_duration": data.get("total_duration", 0),
  49. "load_duration": data.get("load_duration", 0),
  50. "prompt_eval_count": data.get("prompt_eval_count", 0),
  51. "prompt_eval_duration": data.get("prompt_eval_duration", 0),
  52. "eval_count": data.get("eval_count", 0),
  53. "eval_duration": data.get("eval_duration", 0),
  54. "approximate_total": (
  55. lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s"
  56. )((data.get("total_duration", 0) or 0) // 1_000_000_000),
  57. }
  58. data = openai_chat_chunk_message_template(
  59. model, message_content if not done else None, usage
  60. )
  61. line = f"data: {json.dumps(data)}\n\n"
  62. yield line
  63. yield "data: [DONE]\n\n"