pdf_generator.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. from datetime import datetime
  2. from io import BytesIO
  3. from pathlib import Path
  4. from typing import Dict, Any, List
  5. from markdown import markdown
  6. from xhtml2pdf import pisa
  7. import site
  8. from fpdf import FPDF
  9. from open_webui.env import STATIC_DIR, FONTS_DIR
  10. from open_webui.apps.webui.models.chats import ChatTitleMessagesForm
  11. class PDFGenerator:
  12. """
  13. Description:
  14. The `PDFGenerator` class is designed to create PDF documents from chat messages.
  15. The process involves transforming markdown content into HTML and then into a PDF format,
  16. which can be easily returned as a response to the routes.
  17. It depends on xhtml2pdf for converting HTML to PDF (more details at https://github.com/xhtml2pdf/xhtml2pdf).
  18. I found xhtml2pdf issues when rendering list html tag, see https://github.com/xhtml2pdf/xhtml2pdf/issues/550
  19. and https://github.com/xhtml2pdf/xhtml2pdf/issues/756.
  20. Attributes:
  21. - `form_data`: An instance of `ChatTitleMessagesForm` containing title and messages.
  22. """
  23. def __init__(self, form_data: ChatTitleMessagesForm):
  24. self.html_body = None
  25. self.messages_html = None
  26. self.form_data = form_data
  27. self.css = Path(STATIC_DIR / "assets" / "pdf-style.css").read_text()
  28. def format_timestamp(self, timestamp: float) -> str:
  29. """Convert a UNIX timestamp to a formatted date string."""
  30. try:
  31. date_time = datetime.fromtimestamp(timestamp)
  32. return date_time.strftime("%Y-%m-%d, %H:%M:%S")
  33. except (ValueError, TypeError) as e:
  34. # Log the error if necessary
  35. return ""
  36. def _build_html_message(self, message: Dict[str, Any]) -> str:
  37. """Build HTML for a single message."""
  38. role = message.get("role", "user")
  39. content = message.get("content", "")
  40. timestamp = message.get("timestamp")
  41. model = message.get("model") if role == "assistant" else ""
  42. date_str = self.format_timestamp(timestamp) if timestamp else ""
  43. # extends pymdownx extension to convert markdown to html.
  44. # - https://facelessuser.github.io/pymdown-extensions/usage_notes/
  45. html_content = markdown(content, extensions=["pymdownx.extra"])
  46. html_message = f"""
  47. <div class="message">
  48. <small> {date_str} </small>
  49. <div>
  50. <h2>
  51. <strong>{role.title()}</strong>
  52. <small class="text-muted">{model}</small>
  53. </h2>
  54. </div>
  55. <div class="markdown-section">
  56. {html_content}
  57. </div>
  58. </div>
  59. """
  60. return html_message
  61. def _fetch_resources(self, uri: str, rel: str) -> str:
  62. print(str(STATIC_DIR / uri))
  63. return str(STATIC_DIR / uri)
  64. def _create_pdf_from_html(self) -> bytes:
  65. """Convert HTML content to PDF and return the bytes."""
  66. pdf_buffer = BytesIO()
  67. pisa_status = pisa.CreatePDF(
  68. src=self.html_body.encode("UTF-8"),
  69. dest=pdf_buffer,
  70. encoding="UTF-8",
  71. link_callback=self._fetch_resources,
  72. )
  73. if pisa_status.err:
  74. raise RuntimeError("Error generating PDF")
  75. return pdf_buffer.getvalue()
  76. def _generate_html_body(self) -> str:
  77. """Generate the full HTML body for the PDF."""
  78. return f"""
  79. <html>
  80. <head>
  81. <meta charset="UTF-8">
  82. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  83. <style type="text/css">
  84. {self.css}
  85. </style>
  86. </head>
  87. <body>
  88. <div class="container">
  89. <div class="text-center">
  90. <h1>{self.form_data.title}</h1>
  91. </div>
  92. <div>
  93. {self.messages_html}
  94. </div>
  95. </div>
  96. </body>
  97. </html>
  98. """
  99. def generate_chat_pdf(self) -> bytes:
  100. """
  101. Generate a PDF from chat messages.
  102. """
  103. try:
  104. global FONTS_DIR
  105. pdf = FPDF()
  106. pdf.add_page()
  107. # When running using `pip install` the static directory is in the site packages.
  108. if not FONTS_DIR.exists():
  109. FONTS_DIR = Path(site.getsitepackages()[0]) / "static/fonts"
  110. # When running using `pip install -e .` the static directory is in the site packages.
  111. # This path only works if `open-webui serve` is run from the root of this project.
  112. if not FONTS_DIR.exists():
  113. FONTS_DIR = Path("./backend/static/fonts")
  114. pdf.add_font("NotoSans", "", f"{FONTS_DIR}/NotoSans-Regular.ttf")
  115. pdf.add_font("NotoSans", "b", f"{FONTS_DIR}/NotoSans-Bold.ttf")
  116. pdf.add_font("NotoSans", "i", f"{FONTS_DIR}/NotoSans-Italic.ttf")
  117. pdf.add_font("NotoSansKR", "", f"{FONTS_DIR}/NotoSansKR-Regular.ttf")
  118. pdf.add_font("NotoSansJP", "", f"{FONTS_DIR}/NotoSansJP-Regular.ttf")
  119. pdf.add_font("NotoSansSC", "", f"{FONTS_DIR}/NotoSansSC-Regular.ttf")
  120. pdf.set_font("NotoSans", size=12)
  121. pdf.set_fallback_fonts(["NotoSansKR", "NotoSansJP", "NotoSansSC"])
  122. pdf.set_auto_page_break(auto=True, margin=15)
  123. # Adjust the effective page width for multi_cell
  124. effective_page_width = (
  125. pdf.w - 2 * pdf.l_margin - 10
  126. ) # Subtracted an additional 10 for extra padding
  127. # Add chat messages
  128. for message in self.form_data.messages:
  129. role = message["role"]
  130. content = message["content"]
  131. pdf.set_font("NotoSans", "B", size=14) # Bold for the role
  132. pdf.multi_cell(effective_page_width, 10, f"{role.upper()}", 0, "L")
  133. pdf.ln(1) # Extra space between messages
  134. pdf.set_font("NotoSans", size=10) # Regular for content
  135. pdf.multi_cell(effective_page_width, 6, content, 0, "L")
  136. pdf.ln(1.5) # Extra space between messages
  137. # Save the pdf with name .pdf
  138. pdf_bytes = pdf.output()
  139. return bytes(pdf_bytes)
  140. except Exception as e:
  141. raise e