Forráskód Böngészése

Merge pull request #6156 from noczero/add-pdf-generator

Feat: Enhance PDF Downloader to Export Chat
Timothy Jaeryang Baek 6 hónapja
szülő
commit
9c1820f785

+ 3 - 0
backend/open_webui/apps/webui/models/chats.py

@@ -61,6 +61,9 @@ class ChatModel(BaseModel):
 class ChatForm(BaseModel):
 class ChatForm(BaseModel):
     chat: dict
     chat: dict
 
 
+class ChatTitleMessagesForm(BaseModel):
+    title: str
+    messages: list[dict]
 
 
 class ChatTitleForm(BaseModel):
 class ChatTitleForm(BaseModel):
     title: str
     title: str

+ 6 - 56
backend/open_webui/apps/webui/routers/utils.py

@@ -1,16 +1,14 @@
-import site
-from pathlib import Path
-
 import black
 import black
 import markdown
 import markdown
+
+from open_webui.apps.webui.models.chats import ChatTitleMessagesForm
 from open_webui.config import DATA_DIR, ENABLE_ADMIN_EXPORT
 from open_webui.config import DATA_DIR, ENABLE_ADMIN_EXPORT
-from open_webui.env import FONTS_DIR
 from open_webui.constants import ERROR_MESSAGES
 from open_webui.constants import ERROR_MESSAGES
 from fastapi import APIRouter, Depends, HTTPException, Response, status
 from fastapi import APIRouter, Depends, HTTPException, Response, status
-from fpdf import FPDF
 from pydantic import BaseModel
 from pydantic import BaseModel
 from starlette.responses import FileResponse
 from starlette.responses import FileResponse
 from open_webui.utils.misc import get_gravatar_url
 from open_webui.utils.misc import get_gravatar_url
+from open_webui.utils.pdf_generator import PDFGenerator
 from open_webui.utils.utils import get_admin_user
 from open_webui.utils.utils import get_admin_user
 
 
 router = APIRouter()
 router = APIRouter()
@@ -56,58 +54,10 @@ class ChatForm(BaseModel):
 
 
 @router.post("/pdf")
 @router.post("/pdf")
 async def download_chat_as_pdf(
 async def download_chat_as_pdf(
-    form_data: ChatForm,
+    form_data: ChatTitleMessagesForm,
 ):
 ):
-    global FONTS_DIR
-
-    pdf = FPDF()
-    pdf.add_page()
-
-    # When running using `pip install` the static directory is in the site packages.
-    if not FONTS_DIR.exists():
-        FONTS_DIR = Path(site.getsitepackages()[0]) / "static/fonts"
-    # When running using `pip install -e .` the static directory is in the site packages.
-    # This path only works if `open-webui serve` is run from the root of this project.
-    if not FONTS_DIR.exists():
-        FONTS_DIR = Path("./backend/static/fonts")
-
-    pdf.add_font("NotoSans", "", f"{FONTS_DIR}/NotoSans-Regular.ttf")
-    pdf.add_font("NotoSans", "b", f"{FONTS_DIR}/NotoSans-Bold.ttf")
-    pdf.add_font("NotoSans", "i", f"{FONTS_DIR}/NotoSans-Italic.ttf")
-    pdf.add_font("NotoSansKR", "", f"{FONTS_DIR}/NotoSansKR-Regular.ttf")
-    pdf.add_font("NotoSansJP", "", f"{FONTS_DIR}/NotoSansJP-Regular.ttf")
-    pdf.add_font("NotoSansSC", "", f"{FONTS_DIR}/NotoSansSC-Regular.ttf")
-
-    pdf.set_font("NotoSans", size=12)
-    pdf.set_fallback_fonts(["NotoSansKR", "NotoSansJP", "NotoSansSC"])
-
-    pdf.set_auto_page_break(auto=True, margin=15)
-
-    # Adjust the effective page width for multi_cell
-    effective_page_width = (
-        pdf.w - 2 * pdf.l_margin - 10
-    )  # Subtracted an additional 10 for extra padding
-
-    # Add chat messages
-    for message in form_data.messages:
-        role = message["role"]
-        content = message["content"]
-        pdf.set_font("NotoSans", "B", size=14)  # Bold for the role
-        pdf.multi_cell(effective_page_width, 10, f"{role.upper()}", 0, "L")
-        pdf.ln(1)  # Extra space between messages
-
-        pdf.set_font("NotoSans", size=10)  # Regular for content
-        pdf.multi_cell(effective_page_width, 6, content, 0, "L")
-        pdf.ln(1.5)  # Extra space between messages
-
-    # Save the pdf with name .pdf
-    pdf_bytes = pdf.output()
-
-    return Response(
-        content=bytes(pdf_bytes),
-        media_type="application/pdf",
-        headers={"Content-Disposition": "attachment;filename=chat.pdf"},
-    )
+    response = PDFGenerator(form_data).generate_chat_pdf()
+    return response
 
 
 
 
 @router.get("/db/download")
 @router.get("/db/download")

+ 283 - 0
backend/open_webui/static/assets/pdf-style.css

@@ -0,0 +1,283 @@
+/* HTML and Body */
+html {
+    box-sizing: border-box;
+    font-size: 14px; /* Default font size */
+    line-height: 1.5;
+}
+
+*, *::before, *::after {
+    box-sizing: inherit;
+}
+
+body {
+    margin: 0;
+    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
+    color: #212529;
+    background-color: #fff;
+    width: auto;
+}
+
+/* Typography */
+h1, h2, h3, h4, h5, h6 {
+    font-weight: 500;
+    margin: 0;
+}
+
+h1 {
+    font-size: 2.5rem;
+}
+
+h2 {
+    font-size: 2rem;
+}
+
+h3 {
+    font-size: 1.75rem;
+}
+
+h4 {
+    font-size: 1.5rem;
+}
+
+h5 {
+    font-size: 1.25rem;
+}
+
+h6 {
+    font-size: 1rem;
+}
+
+p {
+    margin-top: 0;
+    margin-bottom: 1rem;
+}
+
+/* Grid System */
+.container {
+    width: 100%;
+    padding-right: 15px;
+    padding-left: 15px;
+    margin-right: auto;
+    margin-left: auto;
+}
+
+/* Utilities */
+.text-center {
+    text-align: center;
+}
+
+/* Additional Text Utilities */
+.text-muted {
+    color: #6c757d; /* Muted text color */
+}
+
+/* Small Text */
+small {
+    font-size: 80%; /* Smaller font size relative to the base */
+    color: #6c757d; /* Lighter text color for secondary information */
+    margin-bottom: 0;
+    margin-top: 0;
+}
+
+/* Strong Element Styles */
+strong {
+    font-weight: bolder; /* Ensures the text is bold */
+    color: inherit; /* Inherits the color from its parent element */
+}
+
+/* link */
+a {
+    color: #007bff;
+    text-decoration: none;
+    background-color: transparent;
+}
+
+a:hover {
+    color: #0056b3;
+    text-decoration: underline;
+}
+
+/* General styles for lists */
+ol, ul, li {
+    padding-left: 40px; /* Increase padding to move bullet points to the right */
+    margin-left: 20px; /* Indent lists from the left */
+}
+
+/* Ordered list styles */
+ol {
+    list-style-type: decimal; /* Use numbers for ordered lists */
+    margin-bottom: 10px; /* Space after each list */
+}
+
+
+ol li {
+    margin-bottom: 0.5rem; /* Space between ordered list items */
+}
+
+/* Unordered list styles */
+ul {
+    list-style-type: disc; /* Use bullets for unordered lists */
+    margin-bottom: 10px; /* Space after each list */
+}
+
+ul li {
+    margin-bottom: 0.5rem; /* Space between unordered list items */
+}
+
+/* List item styles */
+li {
+    margin-bottom: 5px; /* Space between list items */
+    line-height: 1.5; /* Line height for better readability */
+}
+
+/* Nested lists */
+ol ol, ol ul, ul ol, ul ul {
+    padding-left: 20px;
+    margin-left: 30px; /* Further indent nested lists */
+    margin-bottom: 0; /* Remove extra margin at the bottom of nested lists */
+}
+
+/* Code blocks */
+pre {
+    background-color: #f4f4f4;
+    padding: 10px;
+    overflow-x: auto;
+    max-width: 100%; /* Ensure it doesn't overflow the page */
+    width: 80%; /* Set a specific width for a container-like appearance */
+    margin: 0 1em; /* Center the pre block */
+    box-sizing: border-box; /* Include padding in the width */
+    border: 1px solid #ccc; /* Optional: Add a border for better definition */
+    border-radius: 4px; /* Optional: Add rounded corners */
+}
+
+code {
+    font-family: 'Courier New', Courier, monospace;
+    background-color: #f4f4f4;
+    padding: 2px 4px;
+    border-radius: 4px;
+    box-sizing: border-box; /* Include padding in the width */
+}
+
+.message {
+    margin-top: 8px;
+    margin-bottom: 8px;
+}
+
+/* Table Styles */
+table {
+    width: 100%;
+    margin-bottom: 1rem;
+    color: #212529;
+    border-collapse: collapse; /* Removes the space between borders */
+}
+
+th, td {
+    margin: 0;
+    padding: 0.75rem;
+    vertical-align: top;
+    border-top: 1px solid #dee2e6;
+}
+
+thead th {
+    vertical-align: bottom;
+    border-bottom: 2px solid #dee2e6;
+}
+
+tbody + tbody {
+    border-top: 2px solid #dee2e6;
+}
+
+/* markdown-section styles */
+.markdown-section blockquote,
+.markdown-section h1,
+.markdown-section h2,
+.markdown-section h3,
+.markdown-section h4,
+.markdown-section h5,
+.markdown-section h6,
+.markdown-section p,
+.markdown-section pre,
+.markdown-section table,
+.markdown-section ul {
+    /* Give most block elements margin top and bottom */
+    margin-top: 1rem;
+}
+
+/* Remove top margin if it's the first child */
+.markdown-section blockquote:first-child,
+.markdown-section h1:first-child,
+.markdown-section h2:first-child,
+.markdown-section h3:first-child,
+.markdown-section h4:first-child,
+.markdown-section h5:first-child,
+.markdown-section h6:first-child,
+.markdown-section p:first-child,
+.markdown-section pre:first-child,
+.markdown-section table:first-child,
+.markdown-section ul:first-child {
+    margin-top: 0;
+}
+
+
+/* Remove top margin of <ul> following a <p> */
+.markdown-section p + ul {
+    margin-top: 0;
+}
+
+/* Remove bottom margin of <p> if it is followed by a <ul> */
+/* Note: :has is not supported in CSS, so you would need JavaScript for this behavior */
+.markdown-section p {
+    margin-bottom: 0;
+}
+
+/* Add a rule to reset margin-bottom for <p> not followed by <ul> */
+.markdown-section p + ul {
+    margin-top: 0;
+}
+
+/* List item styles */
+.markdown-section li {
+    padding: 2px;
+}
+
+.markdown-section li p {
+    margin-bottom: 0;
+    padding: 0;
+}
+
+/* Avoid margins for nested lists */
+.markdown-section li > ul {
+    margin-top: 0;
+    margin-bottom: 0;
+}
+
+/* Table styles */
+.markdown-section table {
+    width: 100%;
+    border-collapse: collapse;
+    margin: 1rem 0;
+}
+
+.markdown-section th,
+.markdown-section td {
+    border: 1px solid #ddd;
+    padding: 0.5rem;
+    text-align: left;
+}
+
+.markdown-section th {
+    background-color: #f2f2f2;
+}
+
+.markdown-section pre {
+    padding: 10px;
+    margin: 10px;
+}
+
+.markdown-section pre code {
+    position: relative;
+    color: rgb(172, 0, 95);
+}
+
+
+    

BIN
backend/open_webui/static/fonts/NotoSans-Bold.ttf


BIN
backend/open_webui/static/fonts/NotoSans-Italic.ttf


BIN
backend/open_webui/static/fonts/NotoSans-Regular.ttf


BIN
backend/open_webui/static/fonts/NotoSansJP-Regular.ttf


BIN
backend/open_webui/static/fonts/NotoSansKR-Regular.ttf


BIN
backend/open_webui/static/fonts/NotoSansSC-Regular.ttf


+ 134 - 0
backend/open_webui/utils/pdf_generator.py

@@ -0,0 +1,134 @@
+from datetime import datetime
+from io import BytesIO
+from pathlib import Path
+from typing import Dict, Any, List
+
+from markdown import markdown
+from starlette.responses import Response
+from xhtml2pdf import pisa
+
+from open_webui.apps.webui.models.chats import ChatTitleMessagesForm
+
+
+class PDFGenerator:
+    """
+    Description:
+    The `PDFGenerator` class is designed to create PDF documents from chat messages.
+    The process involves transforming markdown content into HTML and then into a PDF format,
+    which can be easily returned as a response to the routes.
+
+    It depends on xhtml2pdf for converting HTML to PDF (more details at https://github.com/xhtml2pdf/xhtml2pdf).
+    I found xhtml2pdf issues when rendering list html tag, see https://github.com/xhtml2pdf/xhtml2pdf/issues/550
+    and https://github.com/xhtml2pdf/xhtml2pdf/issues/756.
+
+    Attributes:
+    - `form_data`: An instance of `ChatTitleMessagesForm` containing title and messages.
+
+    """
+
+    def __init__(self, form_data: ChatTitleMessagesForm):
+        self.html_body = None
+        self.messages_html = None
+        self.form_data = form_data
+        self.css_style_file = Path("./backend/open_webui/static/assets/pdf-style.css")
+
+    def build_html_message(self, message: Dict[str, Any]) -> str:
+        """Build HTML for a single message."""
+        role = message.get("role", "user")
+        content = message.get("content", "")
+        timestamp = message.get('timestamp')
+
+        model = message.get('model') if role == 'assistant' else ''
+
+        date_str = self.format_timestamp(timestamp) if timestamp else ''
+
+        # extends pymdownx extension to convert markdown to html.
+        # - https://facelessuser.github.io/pymdown-extensions/usage_notes/
+        html_content = markdown(content, extensions=['pymdownx.extra'])
+
+        html_message = f"""
+              <div class="message">
+                  <small> {date_str} </small>
+                  <div>
+                      <h2>
+                          <strong>{role.title()}</strong>
+                          <small class="text-muted">{model}</small>
+                      </h2>
+                  </div>
+                  <div class="markdown-section">
+                      {html_content}
+                  </div>
+              </div>
+          """
+        return html_message
+
+    def create_pdf_from_html(self) -> bytes:
+        """Convert HTML content to PDF and return the bytes."""
+        pdf_buffer = BytesIO()
+        pisa_status = pisa.CreatePDF(src=self.html_body, dest=pdf_buffer)
+        if pisa_status.err:
+            raise RuntimeError("Error generating PDF")
+
+        return pdf_buffer.getvalue()
+
+    def format_timestamp(self, timestamp: float) -> str:
+        """Convert a UNIX timestamp to a formatted date string."""
+        try:
+            date_time = datetime.fromtimestamp(timestamp)
+            return date_time.strftime("%Y-%m-%d, %H:%M:%S")
+        except (ValueError, TypeError) as e:
+            # Log the error if necessary
+            return ''
+
+    def generate_chat_pdf(self) -> Response:
+        """
+        Generate a PDF from chat messages.
+
+        Returns:
+            A FastAPI Response with the generated PDF or an error message.
+        """
+        try:
+            # Build HTML messages
+            messages_html_list: List[str] = [self.build_html_message(msg) for msg in self.form_data.messages]
+            self.messages_html = '<div>' + ''.join(messages_html_list) + '</div>'
+
+            # Generate full HTML body
+            self.html_body = self.generate_html_body()
+
+            # Create PDF
+            pdf_bytes = self.create_pdf_from_html()
+
+            # Return PDF as response
+            return Response(
+                content=pdf_bytes,
+                media_type="application/pdf",
+                headers={"Content-Disposition": "attachment;filename=chat.pdf"},
+            )
+        except RuntimeError as pdf_error:
+            # Handle PDF generation errors
+            return Response(content=str(pdf_error), status_code=500)
+        except Exception as e:
+            # Handle other unexpected errors
+            return Response(content="An unexpected error occurred.", status_code=500)
+
+    def generate_html_body(self) -> str:
+        """Generate the full HTML body for the PDF."""
+        return f"""
+        <html>
+            <head>
+                <meta charset="UTF-8">
+                <meta name="viewport" content="width=device-width, initial-scale=1.0">
+                <link rel="stylesheet" href="{self.css_style_file.as_posix()}">
+            </head>
+            <body>
+                <div class="container"> 
+                    <div class="text-center">
+                        <h1>{self.form_data.title}</h1>
+                    </div>
+                    <div>
+                        {self.messages_html}
+                    </div>
+                </div>
+            </body>
+        </html>
+        """

+ 2 - 0
backend/requirements.txt

@@ -51,6 +51,8 @@ einops==0.8.0
 
 
 ftfy==6.2.3
 ftfy==6.2.3
 pypdf==4.3.1
 pypdf==4.3.1
+xhtml2pdf==0.2.16
+pymdown-extensions==10.11.2
 docx2txt==0.8
 docx2txt==0.8
 python-pptx==1.0.0
 python-pptx==1.0.0
 unstructured==0.15.9
 unstructured==0.15.9