pdf_generator.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. from datetime import datetime
  2. from io import BytesIO
  3. from pathlib import Path
  4. from typing import Dict, Any, List
  5. from markdown import markdown
  6. from starlette.responses import Response
  7. from xhtml2pdf import pisa
  8. from open_webui.apps.webui.models.chats import ChatTitleMessagesForm
  9. class PDFGenerator:
  10. """
  11. Description:
  12. The `PDFGenerator` class is designed to create PDF documents from chat messages.
  13. The process involves transforming markdown content into HTML and then into a PDF format,
  14. which can be easily returned as a response to the routes.
  15. It depends on xhtml2pdf for converting HTML to PDF (more details at https://github.com/xhtml2pdf/xhtml2pdf).
  16. I found xhtml2pdf issues when rendering list html tag, see https://github.com/xhtml2pdf/xhtml2pdf/issues/550
  17. and https://github.com/xhtml2pdf/xhtml2pdf/issues/756.
  18. Attributes:
  19. - `form_data`: An instance of `ChatTitleMessagesForm` containing title and messages.
  20. """
  21. def __init__(self, form_data: ChatTitleMessagesForm):
  22. self.html_body = None
  23. self.messages_html = None
  24. self.form_data = form_data
  25. self.css_style_file = Path("./backend/open_webui/static/assets/pdf-style.css")
  26. def build_html_message(self, message: Dict[str, Any]) -> str:
  27. """Build HTML for a single message."""
  28. role = message.get("role", "user")
  29. content = message.get("content", "")
  30. timestamp = message.get('timestamp')
  31. model = message.get('model') if role == 'assistant' else ''
  32. date_str = self.format_timestamp(timestamp) if timestamp else ''
  33. # extends pymdownx extension to convert markdown to html.
  34. # - https://facelessuser.github.io/pymdown-extensions/usage_notes/
  35. html_content = markdown(content, extensions=['pymdownx.extra'])
  36. html_message = f"""
  37. <div class="message">
  38. <small> {date_str} </small>
  39. <div>
  40. <h2>
  41. <strong>{role.title()}</strong>
  42. <small class="text-muted">{model}</small>
  43. </h2>
  44. </div>
  45. <div class="markdown-section">
  46. {html_content}
  47. </div>
  48. </div>
  49. """
  50. return html_message
  51. def create_pdf_from_html(self) -> bytes:
  52. """Convert HTML content to PDF and return the bytes."""
  53. pdf_buffer = BytesIO()
  54. pisa_status = pisa.CreatePDF(src=self.html_body, dest=pdf_buffer)
  55. if pisa_status.err:
  56. raise RuntimeError("Error generating PDF")
  57. return pdf_buffer.getvalue()
  58. def format_timestamp(self, timestamp: float) -> str:
  59. """Convert a UNIX timestamp to a formatted date string."""
  60. try:
  61. date_time = datetime.fromtimestamp(timestamp)
  62. return date_time.strftime("%Y-%m-%d, %H:%M:%S")
  63. except (ValueError, TypeError) as e:
  64. # Log the error if necessary
  65. return ''
  66. def generate_chat_pdf(self) -> Response:
  67. """
  68. Generate a PDF from chat messages.
  69. Returns:
  70. A FastAPI Response with the generated PDF or an error message.
  71. """
  72. try:
  73. # Build HTML messages
  74. messages_html_list: List[str] = [self.build_html_message(msg) for msg in self.form_data.messages]
  75. self.messages_html = '<div>' + ''.join(messages_html_list) + '</div>'
  76. # Generate full HTML body
  77. self.html_body = self.generate_html_body()
  78. # Create PDF
  79. pdf_bytes = self.create_pdf_from_html()
  80. # Return PDF as response
  81. return Response(
  82. content=pdf_bytes,
  83. media_type="application/pdf",
  84. headers={"Content-Disposition": "attachment;filename=chat.pdf"},
  85. )
  86. except RuntimeError as pdf_error:
  87. # Handle PDF generation errors
  88. return Response(content=str(pdf_error), status_code=500)
  89. except Exception as e:
  90. # Handle other unexpected errors
  91. return Response(content="An unexpected error occurred.", status_code=500)
  92. def generate_html_body(self) -> str:
  93. """Generate the full HTML body for the PDF."""
  94. return f"""
  95. <html>
  96. <head>
  97. <meta charset="UTF-8">
  98. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  99. <link rel="stylesheet" href="{self.css_style_file.as_posix()}">
  100. </head>
  101. <body>
  102. <div class="container">
  103. <div class="text-center">
  104. <h1>{self.form_data.title}</h1>
  105. </div>
  106. <div>
  107. {self.messages_html}
  108. </div>
  109. </div>
  110. </body>
  111. </html>
  112. """