123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146 |
- from datetime import datetime
- from io import BytesIO
- from pathlib import Path
- from typing import Dict, Any, List
- from html import escape
- from markdown import markdown
- import site
- from fpdf import FPDF
- from open_webui.env import STATIC_DIR, FONTS_DIR
- from open_webui.models.chats import ChatTitleMessagesForm
- class PDFGenerator:
- """
- Description:
- The `PDFGenerator` class is designed to create PDF documents from chat messages.
- The process involves transforming markdown content into HTML and then into a PDF format
- Attributes:
- - `form_data`: An instance of `ChatTitleMessagesForm` containing title and messages.
- """
- def __init__(self, form_data: ChatTitleMessagesForm):
- self.html_body = None
- self.messages_html = None
- self.form_data = form_data
- self.css = Path(STATIC_DIR / "assets" / "pdf-style.css").read_text()
- def format_timestamp(self, timestamp: float) -> str:
- """Convert a UNIX timestamp to a formatted date string."""
- try:
- date_time = datetime.fromtimestamp(timestamp)
- return date_time.strftime("%Y-%m-%d, %H:%M:%S")
- except (ValueError, TypeError) as e:
- # Log the error if necessary
- return ""
- def _build_html_message(self, message: Dict[str, Any]) -> str:
- """Build HTML for a single message."""
- role = escape(message.get("role", "user"))
- content = escape(message.get("content", ""))
- timestamp = message.get("timestamp")
- model = escape(message.get("model") if role == "assistant" else "")
- date_str = escape(self.format_timestamp(timestamp) if timestamp else "")
- # extends pymdownx extension to convert markdown to html.
- # - https://facelessuser.github.io/pymdown-extensions/usage_notes/
- # html_content = markdown(content, extensions=["pymdownx.extra"])
- content = content.replace("\n", "<br/>")
- html_message = f"""
- <div>
- <div>
- <h4>
- <strong>{role.title()}</strong>
- <span style="font-size: 12px;">{model}</span>
- </h4>
- <div> {date_str} </div>
- </div>
- <br/>
- <br/>
- <div>
- {content}
- </div>
- </div>
- <br/>
- """
- return html_message
- def _generate_html_body(self) -> str:
- """Generate the full HTML body for the PDF."""
- escaped_title = escape(self.form_data.title)
- return f"""
- <html>
- <head>
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
- </head>
- <body>
- <div>
- <div>
- <h2>{escaped_title}</h2>
- {self.messages_html}
- </div>
- </div>
- </body>
- </html>
- """
- def generate_chat_pdf(self) -> bytes:
- """
- Generate a PDF from chat messages.
- """
- try:
- global FONTS_DIR
- pdf = FPDF()
- pdf.add_page()
- # When running using `pip install` the static directory is in the site packages.
- if not FONTS_DIR.exists():
- FONTS_DIR = Path(site.getsitepackages()[0]) / "static/fonts"
- # When running using `pip install -e .` the static directory is in the site packages.
- # This path only works if `open-webui serve` is run from the root of this project.
- if not FONTS_DIR.exists():
- FONTS_DIR = Path(".") / "backend" / "static" / "fonts"
- pdf.add_font("NotoSans", "", f"{FONTS_DIR}/NotoSans-Regular.ttf")
- pdf.add_font("NotoSans", "b", f"{FONTS_DIR}/NotoSans-Bold.ttf")
- pdf.add_font("NotoSans", "i", f"{FONTS_DIR}/NotoSans-Italic.ttf")
- pdf.add_font("NotoSansKR", "", f"{FONTS_DIR}/NotoSansKR-Regular.ttf")
- pdf.add_font("NotoSansJP", "", f"{FONTS_DIR}/NotoSansJP-Regular.ttf")
- pdf.add_font("NotoSansSC", "", f"{FONTS_DIR}/NotoSansSC-Regular.ttf")
- pdf.add_font("Twemoji", "", f"{FONTS_DIR}/Twemoji.ttf")
- pdf.set_font("NotoSans", size=12)
- pdf.set_fallback_fonts(
- ["NotoSansKR", "NotoSansJP", "NotoSansSC", "Twemoji"]
- )
- pdf.set_auto_page_break(auto=True, margin=15)
- # Build HTML messages
- messages_html_list: List[str] = [
- self._build_html_message(msg) for msg in self.form_data.messages
- ]
- self.messages_html = "<div>" + "".join(messages_html_list) + "</div>"
- # Generate full HTML body
- self.html_body = self._generate_html_body()
- pdf.write_html(self.html_body)
- # Save the pdf with name .pdf
- pdf_bytes = pdf.output()
- return bytes(pdf_bytes)
- except Exception as e:
- raise e
|