function export_to_pdf
Exports a document with text and data sections to a PDF file using ReportLab, handling custom styling, section ordering, and content formatting including Quill Delta to HTML/Markdown conversion.
/tf/active/vicechatdev/vice_ai/new_app.py
3351 - 3502
complex
Purpose
This function generates a professionally formatted PDF document from a structured document object containing text sections (with Quill Delta format content) and optional data sections. It creates a PDF with custom styles for titles, headings, and content, processes sections in their specified order, converts rich text formats, and handles both text and data visualization sections. The function is designed for document export functionality in a content management or document generation system.
Source Code
def export_to_pdf(document, text_sections, data_sections=None):
"""Export document to PDF format"""
if not PDF_AVAILABLE:
raise ImportError("reportlab not available")
if data_sections is None:
data_sections = []
buffer = BytesIO()
doc = SimpleDocTemplate(buffer, pagesize=A4)
styles = getSampleStyleSheet()
story = []
# Custom styles
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Title'],
fontSize=24,
spaceAfter=30,
alignment=TA_CENTER
)
heading1_style = ParagraphStyle(
'CustomHeading1',
parent=styles['Heading1'],
fontSize=18,
spaceAfter=12,
spaceBefore=20
)
heading2_style = ParagraphStyle(
'CustomHeading2',
parent=styles['Heading2'],
fontSize=16,
spaceAfter=10,
spaceBefore=15
)
heading3_style = ParagraphStyle(
'CustomHeading3',
parent=styles['Heading3'],
fontSize=14,
spaceAfter=8,
spaceBefore=12
)
# Add custom styles to styles dictionary for helper functions
styles.add(heading1_style)
styles.add(heading2_style)
styles.add(heading3_style)
# Document title
story.append(Paragraph(document.title, title_style))
story.append(Spacer(1, 12))
# Author and metadata
if document.owner:
story.append(Paragraph(f"<b>Author:</b> {document.owner}", styles['Normal']))
story.append(Paragraph(f"<b>Created:</b> {document.created_at.strftime('%Y-%m-%d %H:%M')}", styles['Normal']))
if document.description:
story.append(Paragraph(f"<b>Description:</b> {document.description}", styles['Normal']))
story.append(Spacer(1, 20))
# Combine text and data sections in document order using document.sections
# Create lookup dicts for fast access
text_sections_dict = {section.id: section for section in text_sections}
data_sections_dict = {section.id: section for section in data_sections}
# Sort document sections by position and process in order
ordered_doc_sections = sorted(document.sections, key=lambda ds: ds.position)
# Add sections in document order
for doc_section in ordered_doc_sections:
section = None
is_data_section = False
if doc_section.section_type == SectionType.TEXT and doc_section.section_id in text_sections_dict:
section = text_sections_dict[doc_section.section_id]
is_data_section = False
elif doc_section.section_type == SectionType.DATA and doc_section.section_id in data_sections_dict:
section = data_sections_dict[doc_section.section_id]
is_data_section = True
if not section:
continue
# Process data sections
if is_data_section:
add_data_section_to_pdf(story, section, styles, heading2_style, heading3_style)
story.append(Spacer(1, 20))
continue
# Process text sections
if section.section_type.value == 'header':
# Add header based on level
level = min(getattr(section, 'level', 1), 3) # Use heading1, heading2, or heading3
if level == 1:
story.append(Paragraph(section.title, heading1_style))
elif level == 2:
story.append(Paragraph(section.title, heading2_style))
else:
story.append(Paragraph(section.title, heading3_style))
elif section.section_type.value in ['text', 'content']:
# Add text content
if section.title:
story.append(Paragraph(section.title, heading3_style))
if section.current_content:
# First convert from Quill Delta format to HTML
content_to_process = section.current_content
logger.info(f"PDF: Raw content type: {type(content_to_process)}, first 100 chars: {str(content_to_process)[:100]}")
# Convert Quill Delta to HTML first
html_content = convert_quill_delta_to_html(content_to_process)
logger.info(f"PDF: Converted to HTML: {html_content[:100]}...")
# Then convert HTML to Markdown for processing
markdown_content = html_to_markdown(html_content)
logger.info(f"PDF: Converted to Markdown: {markdown_content[:100]}...")
try:
# Process markdown content for proper formatting
elements = process_markdown_content(markdown_content)
add_formatted_content_to_pdf(story, elements, styles)
except Exception as e:
logger.warning(f"Error processing content for section {section.id}: {e}")
# Fallback to simple paragraph splitting with basic formatting
clean_content = clean_html_tags(html_content)
paragraphs = clean_content.split('\n\n')
for para_text in paragraphs:
para_text = para_text.strip()
if para_text:
# Escape characters for reportlab
para_text = html.escape(para_text)
story.append(Paragraph(para_text, styles['Normal']))
story.append(Spacer(1, 6))
# Clean HTML tags if present for fallback
clean_content = clean_html_tags(content_to_process)
paragraphs = clean_content.split('\n\n')
for para_text in paragraphs:
if para_text.strip():
story.append(Paragraph(para_text.strip(), styles['Normal']))
story.append(Spacer(1, 6))
# Build PDF
doc.build(story)
buffer.seek(0)
return buffer.getvalue()
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
document |
- | - | positional_or_keyword |
text_sections |
- | - | positional_or_keyword |
data_sections |
- | None | positional_or_keyword |
Parameter Details
document: A Document model object containing document metadata (title, owner, created_at, description) and a 'sections' attribute with ordered DocumentSection objects. Each DocumentSection has section_type (TEXT or DATA), section_id, and position attributes for ordering.
text_sections: A list or iterable of TextSection model objects containing text content. Each section should have attributes: id, section_type (with value 'header', 'text', or 'content'), title, current_content (in Quill Delta format), and optionally a 'level' attribute for headers (1-3).
data_sections: Optional list of DataSection model objects containing data visualizations or tables. Defaults to empty list if None. Each section is processed by the add_data_section_to_pdf helper function.
Return Value
Returns bytes object containing the complete PDF file data. This can be written to a file, sent as an HTTP response, or stored in memory. Returns None implicitly if PDF_AVAILABLE flag is False (raises ImportError instead).
Dependencies
reportlabiohtmllogging
Required Imports
from io import BytesIO
from reportlab.lib.pagesizes import A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_CENTER
import html
Conditional/Optional Imports
These imports are only needed under specific conditions:
from models import Document, TextSection, DataSection, DocumentSection, SectionType
Condition: Required for type checking and accessing model objects passed as parameters
Required (conditional)from reportlab import *
Condition: Only if PDF_AVAILABLE flag is True, otherwise raises ImportError
Required (conditional)import logging as logger
Condition: Required for logging content conversion steps and errors
Required (conditional)Usage Example
from io import BytesIO
from models import Document, TextSection, DataSection
from datetime import datetime
# Assume document, text_sections, and data_sections are already created
document = Document(
title='Annual Report 2024',
owner='John Doe',
created_at=datetime.now(),
description='Company annual report',
sections=[]
)
text_sections = [
TextSection(
id=1,
section_type='header',
title='Executive Summary',
level=1,
current_content='{"ops":[{"insert":"Summary text"}]}'
),
TextSection(
id=2,
section_type='text',
title='Introduction',
current_content='{"ops":[{"insert":"Introduction content"}]}'
)
]
data_sections = [
DataSection(id=3, title='Sales Chart', data={})
]
# Export to PDF
try:
pdf_bytes = export_to_pdf(document, text_sections, data_sections)
# Save to file
with open('output.pdf', 'wb') as f:
f.write(pdf_bytes)
# Or send as HTTP response
# return send_file(BytesIO(pdf_bytes), mimetype='application/pdf', as_attachment=True, download_name='report.pdf')
except ImportError as e:
print(f'ReportLab not available: {e}')
Best Practices
- Ensure PDF_AVAILABLE flag is checked before calling this function to avoid ImportError
- Provide document objects with properly ordered sections (via position attribute) for correct PDF layout
- TextSection current_content should be in valid Quill Delta JSON format for proper conversion
- Handle the returned bytes appropriately - write to file, stream to response, or store in memory
- Implement error handling around the function call as content processing can raise exceptions
- The function logs extensively - ensure logger is configured to capture info and warning levels for debugging
- Helper functions (convert_quill_delta_to_html, html_to_markdown, etc.) must be available in scope
- For large documents, consider memory usage as the entire PDF is built in memory before returning
- The function has fallback logic for content processing errors - review logs if output is not as expected
- Custom styles are added to the styles dictionary - ensure no naming conflicts with existing styles
- Data sections require the add_data_section_to_pdf helper function to be properly implemented
- Section types must match expected values ('header', 'text', 'content' for text sections)
- Header levels are clamped to 1-3 to match available heading styles
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function export_to_pdf_v1 85.3% similar
-
function export_to_docx 75.9% similar
-
function add_formatted_content_to_pdf 70.8% similar
-
function add_formatted_content_to_pdf_v1 70.2% similar
-
function export_to_pdf_v1 67.4% similar