revert: pdf gen

This commit is contained in:
Timothy J. Baek
2024-10-13 00:05:28 -07:00
parent 5dc05eac67
commit 112cbdccbb
14 changed files with 249 additions and 163 deletions

View File

@ -56,8 +56,17 @@ class ChatForm(BaseModel):
async def download_chat_as_pdf( async def download_chat_as_pdf(
form_data: ChatTitleMessagesForm, form_data: ChatTitleMessagesForm,
): ):
response = PDFGenerator(form_data).generate_chat_pdf() try:
return response pdf_bytes = PDFGenerator(form_data).generate_chat_pdf()
return Response(
content=pdf_bytes,
media_type="application/pdf",
headers={"Content-Disposition": "attachment;filename=chat.pdf"},
)
except Exception as e:
print(e)
raise HTTPException(status_code=400, detail=str(e))
@router.get("/db/download") @router.get("/db/download")

View File

@ -230,6 +230,8 @@ if FROM_INIT_PY:
DATA_DIR = Path(os.getenv("DATA_DIR", OPEN_WEBUI_DIR / "data")) DATA_DIR = Path(os.getenv("DATA_DIR", OPEN_WEBUI_DIR / "data"))
STATIC_DIR = Path(os.getenv("STATIC_DIR", OPEN_WEBUI_DIR / "static"))
FONTS_DIR = Path(os.getenv("FONTS_DIR", OPEN_WEBUI_DIR / "static" / "fonts")) FONTS_DIR = Path(os.getenv("FONTS_DIR", OPEN_WEBUI_DIR / "static" / "fonts"))
FRONTEND_BUILD_DIR = Path(os.getenv("FRONTEND_BUILD_DIR", BASE_DIR / "build")).resolve() FRONTEND_BUILD_DIR = Path(os.getenv("FRONTEND_BUILD_DIR", BASE_DIR / "build")).resolve()

View File

@ -1,24 +1,57 @@
/* HTML and Body */ /* HTML and Body */
@font-face {
font-family: 'NotoSans';
src: url('fonts/NotoSans-Variable.ttf');
}
@font-face {
font-family: 'NotoSansJP';
src: url('fonts/NotoSansJP-Variable.ttf');
}
@font-face {
font-family: 'NotoSansKR';
src: url('fonts/NotoSansKR-Variable.ttf');
}
@font-face {
font-family: 'NotoSansSC';
src: url('fonts/NotoSansSC-Variable.ttf');
}
@font-face {
font-family: 'NotoSansSC-Regular';
src: url('fonts/NotoSansSC-Regular.ttf');
}
html { html {
box-sizing: border-box; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'NotoSans', 'NotoSansJP', 'NotoSansKR',
'NotoSansSC', 'STSong-Light', 'MSung-Light', 'HeiseiMin-W3', 'HYSMyeongJo-Medium', Roboto,
'Helvetica Neue', Arial, sans-serif;
font-size: 14px; /* Default font size */ font-size: 14px; /* Default font size */
line-height: 1.5; line-height: 1.5;
} }
*, *::before, *::after { *,
*::before,
*::after {
box-sizing: inherit; box-sizing: inherit;
} }
body { body {
margin: 0; margin: 0;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
color: #212529; color: #212529;
background-color: #fff; background-color: #fff;
width: auto; width: auto;
} }
/* Typography */ /* Typography */
h1, h2, h3, h4, h5, h6 { h1,
h2,
h3,
h4,
h5,
h6 {
font-weight: 500; font-weight: 500;
margin: 0; margin: 0;
} }
@ -98,7 +131,9 @@ a:hover {
} }
/* General styles for lists */ /* General styles for lists */
ol, ul, li { ol,
ul,
li {
padding-left: 40px; /* Increase padding to move bullet points to the right */ padding-left: 40px; /* Increase padding to move bullet points to the right */
margin-left: 20px; /* Indent lists from the left */ margin-left: 20px; /* Indent lists from the left */
} }
@ -109,7 +144,6 @@ ol {
margin-bottom: 10px; /* Space after each list */ margin-bottom: 10px; /* Space after each list */
} }
ol li { ol li {
margin-bottom: 0.5rem; /* Space between ordered list items */ margin-bottom: 0.5rem; /* Space between ordered list items */
} }
@ -131,7 +165,10 @@ li {
} }
/* Nested lists */ /* Nested lists */
ol ol, ol ul, ul ol, ul ul { ol ol,
ol ul,
ul ol,
ul ul {
padding-left: 20px; padding-left: 20px;
margin-left: 30px; /* Further indent nested lists */ margin-left: 30px; /* Further indent nested lists */
margin-bottom: 0; /* Remove extra margin at the bottom of nested lists */ margin-bottom: 0; /* Remove extra margin at the bottom of nested lists */
@ -161,6 +198,8 @@ code {
.message { .message {
margin-top: 8px; margin-top: 8px;
margin-bottom: 8px; margin-bottom: 8px;
max-width: 100%;
overflow-wrap: break-word;
} }
/* Table Styles */ /* Table Styles */
@ -171,7 +210,8 @@ table {
border-collapse: collapse; /* Removes the space between borders */ border-collapse: collapse; /* Removes the space between borders */
} }
th, td { th,
td {
margin: 0; margin: 0;
padding: 0.75rem; padding: 0.75rem;
vertical-align: top; vertical-align: top;
@ -218,7 +258,6 @@ tbody + tbody {
margin-top: 0; margin-top: 0;
} }
/* Remove top margin of <ul> following a <p> */ /* Remove top margin of <ul> following a <p> */
.markdown-section p + ul { .markdown-section p + ul {
margin-top: 0; margin-top: 0;
@ -278,6 +317,3 @@ tbody + tbody {
position: relative; position: relative;
color: rgb(172, 0, 95); color: rgb(172, 0, 95);
} }

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -4,9 +4,12 @@ from pathlib import Path
from typing import Dict, Any, List from typing import Dict, Any, List
from markdown import markdown from markdown import markdown
from starlette.responses import Response
from xhtml2pdf import pisa from xhtml2pdf import pisa
import site
from fpdf import FPDF
from open_webui.env import STATIC_DIR, FONTS_DIR
from open_webui.apps.webui.models.chats import ChatTitleMessagesForm from open_webui.apps.webui.models.chats import ChatTitleMessagesForm
@ -30,21 +33,31 @@ class PDFGenerator:
self.html_body = None self.html_body = None
self.messages_html = None self.messages_html = None
self.form_data = form_data self.form_data = form_data
self.css_style_file = Path("./backend/open_webui/static/assets/pdf-style.css")
def build_html_message(self, message: Dict[str, Any]) -> str: self.css = Path(STATIC_DIR / "assets" / "pdf-style.css").read_text()
def format_timestamp(self, timestamp: float) -> str:
"""Convert a UNIX timestamp to a formatted date string."""
try:
date_time = datetime.fromtimestamp(timestamp)
return date_time.strftime("%Y-%m-%d, %H:%M:%S")
except (ValueError, TypeError) as e:
# Log the error if necessary
return ""
def _build_html_message(self, message: Dict[str, Any]) -> str:
"""Build HTML for a single message.""" """Build HTML for a single message."""
role = message.get("role", "user") role = message.get("role", "user")
content = message.get("content", "") content = message.get("content", "")
timestamp = message.get('timestamp') timestamp = message.get("timestamp")
model = message.get('model') if role == 'assistant' else '' model = message.get("model") if role == "assistant" else ""
date_str = self.format_timestamp(timestamp) if timestamp else '' date_str = self.format_timestamp(timestamp) if timestamp else ""
# extends pymdownx extension to convert markdown to html. # extends pymdownx extension to convert markdown to html.
# - https://facelessuser.github.io/pymdown-extensions/usage_notes/ # - https://facelessuser.github.io/pymdown-extensions/usage_notes/
html_content = markdown(content, extensions=['pymdownx.extra']) html_content = markdown(content, extensions=["pymdownx.extra"])
html_message = f""" html_message = f"""
<div class="message"> <div class="message">
@ -62,63 +75,35 @@ class PDFGenerator:
""" """
return html_message return html_message
def create_pdf_from_html(self) -> bytes: def _fetch_resources(self, uri: str, rel: str) -> str:
print(str(STATIC_DIR / uri))
return str(STATIC_DIR / uri)
def _create_pdf_from_html(self) -> bytes:
"""Convert HTML content to PDF and return the bytes.""" """Convert HTML content to PDF and return the bytes."""
pdf_buffer = BytesIO() pdf_buffer = BytesIO()
pisa_status = pisa.CreatePDF(src=self.html_body, dest=pdf_buffer) pisa_status = pisa.CreatePDF(
src=self.html_body.encode("UTF-8"),
dest=pdf_buffer,
encoding="UTF-8",
link_callback=self._fetch_resources,
)
if pisa_status.err: if pisa_status.err:
raise RuntimeError("Error generating PDF") raise RuntimeError("Error generating PDF")
return pdf_buffer.getvalue() return pdf_buffer.getvalue()
def format_timestamp(self, timestamp: float) -> str: def _generate_html_body(self) -> str:
"""Convert a UNIX timestamp to a formatted date string."""
try:
date_time = datetime.fromtimestamp(timestamp)
return date_time.strftime("%Y-%m-%d, %H:%M:%S")
except (ValueError, TypeError) as e:
# Log the error if necessary
return ''
def generate_chat_pdf(self) -> Response:
"""
Generate a PDF from chat messages.
Returns:
A FastAPI Response with the generated PDF or an error message.
"""
try:
# Build HTML messages
messages_html_list: List[str] = [self.build_html_message(msg) for msg in self.form_data.messages]
self.messages_html = '<div>' + ''.join(messages_html_list) + '</div>'
# Generate full HTML body
self.html_body = self.generate_html_body()
# Create PDF
pdf_bytes = self.create_pdf_from_html()
# Return PDF as response
return Response(
content=pdf_bytes,
media_type="application/pdf",
headers={"Content-Disposition": "attachment;filename=chat.pdf"},
)
except RuntimeError as pdf_error:
# Handle PDF generation errors
return Response(content=str(pdf_error), status_code=500)
except Exception as e:
# Handle other unexpected errors
return Response(content="An unexpected error occurred.", status_code=500)
def generate_html_body(self) -> str:
"""Generate the full HTML body for the PDF.""" """Generate the full HTML body for the PDF."""
return f""" return f"""
<html> <html>
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="{self.css_style_file.as_posix()}"> <style type="text/css">
{self.css}
</style>
</head> </head>
<body> <body>
<div class="container"> <div class="container">
@ -132,3 +117,57 @@ class PDFGenerator:
</body> </body>
</html> </html>
""" """
def generate_chat_pdf(self) -> bytes:
"""
Generate a PDF from chat messages.
"""
try:
global FONTS_DIR
pdf = FPDF()
pdf.add_page()
# When running using `pip install` the static directory is in the site packages.
if not FONTS_DIR.exists():
FONTS_DIR = Path(site.getsitepackages()[0]) / "static/fonts"
# When running using `pip install -e .` the static directory is in the site packages.
# This path only works if `open-webui serve` is run from the root of this project.
if not FONTS_DIR.exists():
FONTS_DIR = Path("./backend/static/fonts")
pdf.add_font("NotoSans", "", f"{FONTS_DIR}/NotoSans-Regular.ttf")
pdf.add_font("NotoSans", "b", f"{FONTS_DIR}/NotoSans-Bold.ttf")
pdf.add_font("NotoSans", "i", f"{FONTS_DIR}/NotoSans-Italic.ttf")
pdf.add_font("NotoSansKR", "", f"{FONTS_DIR}/NotoSansKR-Regular.ttf")
pdf.add_font("NotoSansJP", "", f"{FONTS_DIR}/NotoSansJP-Regular.ttf")
pdf.add_font("NotoSansSC", "", f"{FONTS_DIR}/NotoSansSC-Regular.ttf")
pdf.set_font("NotoSans", size=12)
pdf.set_fallback_fonts(["NotoSansKR", "NotoSansJP", "NotoSansSC"])
pdf.set_auto_page_break(auto=True, margin=15)
# Adjust the effective page width for multi_cell
effective_page_width = (
pdf.w - 2 * pdf.l_margin - 10
) # Subtracted an additional 10 for extra padding
# Add chat messages
for message in self.form_data.messages:
role = message["role"]
content = message["content"]
pdf.set_font("NotoSans", "B", size=14) # Bold for the role
pdf.multi_cell(effective_page_width, 10, f"{role.upper()}", 0, "L")
pdf.ln(1) # Extra space between messages
pdf.set_font("NotoSans", size=10) # Regular for content
pdf.multi_cell(effective_page_width, 6, content, 0, "L")
pdf.ln(1.5) # Extra space between messages
# Save the pdf with name .pdf
pdf_bytes = pdf.output()
return bytes(pdf_bytes)
except Exception as e:
raise e