function msg_to_pdf
Converts a Microsoft Outlook .msg email file to a single PDF document, including the email body and all attachments merged together.
/tf/active/vicechatdev/msg_to_eml.py
874 - 992
complex
Purpose
This function provides comprehensive email-to-PDF conversion functionality. It extracts the email body from a .msg file, converts it to HTML and then PDF, processes all attachments (converting them to PDF when possible), and merges everything into a single consolidated PDF document. This is useful for archiving emails, creating printable records, or integrating email content into document management systems.
Source Code
def msg_to_pdf(msg_path, pdf_path):
"""Convert a .msg file to PDF format with all attachments included"""
try:
# Check if input file exists
if not os.path.exists(msg_path):
logger.error(f"Input file not found: {msg_path}")
return False
# Create a temporary directory for processing
with tempfile.TemporaryDirectory() as temp_dir:
# Load the MSG file
msg = extract_msg.Message(msg_path)
# Generate HTML from the message for the email body PDF
html_content = generate_html_from_msg(msg)
html_path = os.path.join(temp_dir, "email.html")
with open(html_path, 'w', encoding='utf-8') as f:
f.write(html_content)
# Convert the HTML to PDF for the email body
pdf_converter = PDFConverter(temp_dir)
email_body_pdf = os.path.join(temp_dir, "email_body.pdf")
result = pdf_converter.convert_to_pdf(html_path, email_body_pdf)
if not result or not os.path.exists(email_body_pdf):
logger.error(f"Failed to convert email body HTML to PDF for {msg_path}")
return False
# List to collect all PDFs for merging (starting with the email body PDF)
pdf_files_to_merge = [email_body_pdf]
# Process attachments (if any)
if msg.attachments:
logger.info(f"Processing {len(msg.attachments)} attachments")
for i, attachment in enumerate(msg.attachments):
try:
# Get filename
filename = None
if hasattr(attachment, 'longFilename') and attachment.longFilename:
filename = attachment.longFilename
elif hasattr(attachment, 'shortFilename') and attachment.shortFilename:
filename = attachment.shortFilename
else:
filename = f'attachment_{i+1}'
logger.info(f"Processing attachment: {filename}")
# Skip if no data
if not attachment.data:
logger.warning(f"Skipping empty attachment: {filename}")
continue
# Save attachment to temp file
attachment_path = os.path.join(temp_dir, filename)
with open(attachment_path, 'wb') as f:
f.write(attachment.data)
# Skip conversion for very small files (likely empty)
if os.path.getsize(attachment_path) < 10:
logger.warning(f"Skipping tiny attachment: {filename}")
continue
# Try to convert attachment to PDF if possible
attachment_pdf = os.path.join(temp_dir, f"attachment_{i+1}.pdf")
# If already PDF, just use it as is
if filename.lower().endswith('.pdf'):
shutil.copy2(attachment_path, attachment_pdf)
pdf_files_to_merge.append(attachment_pdf)
continue
# Try to convert to PDF
conversion_result = pdf_converter.convert_to_pdf(attachment_path, attachment_pdf)
if conversion_result and os.path.exists(attachment_pdf):
pdf_files_to_merge.append(attachment_pdf)
logger.info(f"Successfully converted attachment: {filename}")
else:
logger.warning(f"Could not convert attachment to PDF: {filename}")
# For images that failed normal conversion, try direct image-to-pdf
if os.path.splitext(filename.lower())[1] in pdf_converter.IMAGE_EXTENSIONS:
try:
pdf_converter._convert_image_to_pdf(attachment_path, attachment_pdf)
if os.path.exists(attachment_pdf):
pdf_files_to_merge.append(attachment_pdf)
logger.info(f"Converted image using direct method: {filename}")
except Exception as e:
logger.error(f"Failed direct image conversion: {str(e)}")
except Exception as e:
logger.error(f"Error processing attachment {filename}: {str(e)}")
# Merge all PDFs if we have multiple
if len(pdf_files_to_merge) > 1:
logger.info(f"Merging {len(pdf_files_to_merge)} PDFs")
merge_result = merge_pdfs(pdf_files_to_merge, pdf_path)
if merge_result and os.path.exists(pdf_path):
logger.info(f"Successfully merged PDFs to: {pdf_path}")
return True
else:
logger.error("Failed to merge PDFs")
return False
elif len(pdf_files_to_merge) == 1:
# Just copy the single PDF if only email body was converted
shutil.copy2(pdf_files_to_merge[0], pdf_path)
logger.info(f"Created PDF without attachments: {pdf_path}")
return True
else:
logger.error("No PDFs were created for merging")
return False
except Exception as e:
logger.error(f"Error converting {msg_path} to PDF: {str(e)}")
logger.error(traceback.format_exc())
return False
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
msg_path |
- | - | positional_or_keyword |
pdf_path |
- | - | positional_or_keyword |
Parameter Details
msg_path: String path to the input .msg file (Microsoft Outlook message format). Must be a valid file path pointing to an existing .msg file. The function will check for file existence and return False if not found.
pdf_path: String path where the output PDF file should be saved. This is the destination path for the final merged PDF containing the email body and all converted attachments. The directory must be writable.
Return Value
Returns a boolean value: True if the conversion was successful and the PDF was created at the specified path, False if any error occurred during the process (file not found, conversion failure, merge failure, etc.). The function logs detailed error messages for debugging.
Dependencies
extract_msgostempfileshutiltracebackloggingPyPDF2
Required Imports
import extract_msg
import os
import tempfile
import shutil
import traceback
import logging
Usage Example
import logging
import os
from msg_converter import msg_to_pdf
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Convert a .msg file to PDF
msg_file = '/path/to/email.msg'
output_pdf = '/path/to/output.pdf'
success = msg_to_pdf(msg_file, output_pdf)
if success:
print(f'Successfully converted {msg_file} to {output_pdf}')
print(f'File size: {os.path.getsize(output_pdf)} bytes')
else:
print('Conversion failed. Check logs for details.')
Best Practices
- Ensure the logger is properly configured before calling this function to capture detailed error messages
- Verify that the input .msg file is not corrupted and is a valid Microsoft Outlook message format
- Ensure sufficient disk space is available as the function creates temporary files during processing
- The function automatically cleans up temporary files using tempfile.TemporaryDirectory context manager
- Handle the boolean return value to determine if conversion was successful before attempting to use the output PDF
- Be aware that not all attachment types can be converted to PDF - the function will skip unconvertible attachments with warnings
- For production use, implement proper error handling around this function call
- The function requires the PDFConverter class, generate_html_from_msg(), and merge_pdfs() helper functions to be available in scope
- Very small attachments (< 10 bytes) are automatically skipped as they are likely empty or corrupted
- Image attachments have a fallback conversion method if the primary conversion fails
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function msg_to_pdf_improved 85.7% similar
-
function msg_to_eml 82.0% similar
-
function msg_to_eml_alternative 76.1% similar
-
function eml_to_pdf 69.0% similar
-
function generate_html_from_msg 66.5% similar