pdf_generator.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. from datetime import datetime
  2. from io import BytesIO
  3. from pathlib import Path
  4. from typing import Dict, Any, List
  5. from html import escape
  6. from markdown import markdown
  7. import site
  8. from fpdf import FPDF
  9. from open_webui.env import STATIC_DIR, FONTS_DIR
  10. from open_webui.models.chats import ChatTitleMessagesForm
  11. class PDFGenerator:
  12. """
  13. Description:
  14. The `PDFGenerator` class is designed to create PDF documents from chat messages.
  15. The process involves transforming markdown content into HTML and then into a PDF format
  16. Attributes:
  17. - `form_data`: An instance of `ChatTitleMessagesForm` containing title and messages.
  18. """
  19. def __init__(self, form_data: ChatTitleMessagesForm):
  20. self.html_body = None
  21. self.messages_html = None
  22. self.form_data = form_data
  23. self.css = Path(STATIC_DIR / "assets" / "pdf-style.css").read_text()
  24. def format_timestamp(self, timestamp: float) -> str:
  25. """Convert a UNIX timestamp to a formatted date string."""
  26. try:
  27. date_time = datetime.fromtimestamp(timestamp)
  28. return date_time.strftime("%Y-%m-%d, %H:%M:%S")
  29. except (ValueError, TypeError) as e:
  30. # Log the error if necessary
  31. return ""
  32. def _build_html_message(self, message: Dict[str, Any]) -> str:
  33. """Build HTML for a single message."""
  34. role = escape(message.get("role", "user"))
  35. content = escape(message.get("content", ""))
  36. timestamp = message.get("timestamp")
  37. model = escape(message.get("model") if role == "assistant" else "")
  38. date_str = escape(self.format_timestamp(timestamp) if timestamp else "")
  39. # extends pymdownx extension to convert markdown to html.
  40. # - https://facelessuser.github.io/pymdown-extensions/usage_notes/
  41. # html_content = markdown(content, extensions=["pymdownx.extra"])
  42. content = content.replace("\n", "<br/>")
  43. html_message = f"""
  44. <div>
  45. <div>
  46. <h4>
  47. <strong>{role.title()}</strong>
  48. <span style="font-size: 12px;">{model}</span>
  49. </h4>
  50. <div> {date_str} </div>
  51. </div>
  52. <br/>
  53. <br/>
  54. <div>
  55. {content}
  56. </div>
  57. </div>
  58. <br/>
  59. """
  60. return html_message
  61. def _generate_html_body(self) -> str:
  62. """Generate the full HTML body for the PDF."""
  63. escaped_title = escape(self.form_data.title)
  64. return f"""
  65. <html>
  66. <head>
  67. <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  68. </head>
  69. <body>
  70. <div>
  71. <div>
  72. <h2>{escaped_title}</h2>
  73. {self.messages_html}
  74. </div>
  75. </div>
  76. </body>
  77. </html>
  78. """
  79. def generate_chat_pdf(self) -> bytes:
  80. """
  81. Generate a PDF from chat messages.
  82. """
  83. try:
  84. global FONTS_DIR
  85. pdf = FPDF()
  86. pdf.add_page()
  87. # When running using `pip install` the static directory is in the site packages.
  88. if not FONTS_DIR.exists():
  89. FONTS_DIR = Path(site.getsitepackages()[0]) / "static/fonts"
  90. # When running using `pip install -e .` the static directory is in the site packages.
  91. # This path only works if `open-webui serve` is run from the root of this project.
  92. if not FONTS_DIR.exists():
  93. FONTS_DIR = Path(".") / "backend" / "static" / "fonts"
  94. pdf.add_font("NotoSans", "", f"{FONTS_DIR}/NotoSans-Regular.ttf")
  95. pdf.add_font("NotoSans", "b", f"{FONTS_DIR}/NotoSans-Bold.ttf")
  96. pdf.add_font("NotoSans", "i", f"{FONTS_DIR}/NotoSans-Italic.ttf")
  97. pdf.add_font("NotoSansKR", "", f"{FONTS_DIR}/NotoSansKR-Regular.ttf")
  98. pdf.add_font("NotoSansJP", "", f"{FONTS_DIR}/NotoSansJP-Regular.ttf")
  99. pdf.add_font("NotoSansSC", "", f"{FONTS_DIR}/NotoSansSC-Regular.ttf")
  100. pdf.add_font("Twemoji", "", f"{FONTS_DIR}/Twemoji.ttf")
  101. pdf.set_font("NotoSans", size=12)
  102. pdf.set_fallback_fonts(
  103. ["NotoSansKR", "NotoSansJP", "NotoSansSC", "Twemoji"]
  104. )
  105. pdf.set_auto_page_break(auto=True, margin=15)
  106. # Build HTML messages
  107. messages_html_list: List[str] = [
  108. self._build_html_message(msg) for msg in self.form_data.messages
  109. ]
  110. self.messages_html = "<div>" + "".join(messages_html_list) + "</div>"
  111. # Generate full HTML body
  112. self.html_body = self._generate_html_body()
  113. pdf.write_html(self.html_body)
  114. # Save the pdf with name .pdf
  115. pdf_bytes = pdf.output()
  116. return bytes(pdf_bytes)
  117. except Exception as e:
  118. raise e