function convert_document_to_pdf
Converts a document version to PDF format with audit trail, signatures, watermarks, and PDF/A compliance options, then uploads the result to FileCloud storage.
/tf/active/vicechatdev/CDocs/controllers/document_controller.py
1546 - 1732
complex
Purpose
This function provides comprehensive document-to-PDF conversion capabilities for controlled documents in a document management system. It retrieves a document version from FileCloud, processes it through a document processor that can add audit pages, signature images, watermarks, and convert to PDF/A archival format, then uploads the resulting PDF back to FileCloud. The function handles version management, permission checking, audit logging, and cleanup of temporary files. It's designed for regulated environments requiring document traceability and compliance.
Source Code
def convert_document_to_pdf(
user: DocUser,
document_uid: str,
version_uid: Optional[str] = None,
include_signatures: bool = True,
convert_to_pdfa: bool = True,
add_watermark: bool = False
) -> Dict[str, Any]:
"""
Convert a document version to PDF using full document processor capabilities
Parameters
----------
user : DocUser
User performing the conversion
document_uid : str
ID of the document
version_uid : str, optional
ID of a specific version (default is current version)
include_signatures : bool
Whether to include signature images in the audit page
convert_to_pdfa : bool
Whether to convert to PDF/A format for archiving
add_watermark : bool
Whether to add a watermark to the document
Returns
-------
Dict[str, Any]
Dictionary with conversion results
"""
try:
# Get document instance
document = ControlledDocument(uid=document_uid)
if not document.uid:
raise ResourceNotFoundError(f"Document not found: {document_uid}")
# Get version
version = None
if version_uid:
version = DocumentVersion(uid=version_uid)
if not version or version.document_uid != document_uid:
raise ResourceNotFoundError(f"Version not found: {version_uid}")
else:
version = document.current_version
if not version:
raise ResourceNotFoundError(f"No versions found for document: {document_uid}")
# Check if the version has an editable file
if not version.word_file_path:
raise BusinessRuleError("Version has no editable document to convert")
# Check if PDF already exists
if version.pdf_file_path:
return {
'success': True,
'message': 'PDF version already exists',
'document_uid': document_uid,
'version_uid': version.uid,
'pdf_path': version.pdf_file_path
}
# Create a temporary directory for processing
temp_dir = tempfile.mkdtemp()
try:
# Get file paths using the centralized function
from CDocs.controllers.filecloud_controller import get_filecloud_document_path
# Get the editable file path
editable_file_path = get_filecloud_document_path(
document=document,
version=version.version_number if version else None
)
# Calculate the FileCloud path for the PDF using the same logic
pdf_filename = f"{os.path.splitext(os.path.basename(editable_file_path))[0]}.pdf"
pdf_file_path = os.path.splitext(editable_file_path)[0] + ".pdf"
# Initialize FileCloud client
filecloud_client = get_filecloud_client()
# Download file content
file_content = filecloud_client.download_file(editable_file_path)
if not isinstance(file_content, bytes):
raise BusinessRuleError("Failed to download editable document")
# Save to temp file
file_ext = os.path.splitext(editable_file_path)[1]
temp_file_path = os.path.join(temp_dir, f"document{file_ext}")
with open(temp_file_path, 'wb') as f:
f.write(file_content)
# Create JSON file with audit data
audit_data = prepare_audit_data_for_document_processor(document, version, user)
json_file_path = os.path.join(temp_dir, "audit_data.json")
with open(json_file_path, 'w') as f:
json.dump(audit_data, f, default=str)
# Set up output PDF path
output_pdf_path = os.path.join(temp_dir, "document.pdf")
# Import the document processor with full capabilities
from document_auditor.src.document_processor import DocumentProcessor
# Initialize document processor
processor = DocumentProcessor()
# Set watermark image path if needed
watermark_path = None
if add_watermark:
# Use system logo as watermark if available
logo_path = settings.LOGO_PATH
if os.path.exists(logo_path):
watermark_path = logo_path
# Process the document with all features
processor.process_document(
original_doc_path=temp_file_path,
json_path=json_file_path,
output_path=output_pdf_path,
watermark_image=watermark_path,
include_signatures=include_signatures,
convert_to_pdfa=convert_to_pdfa,
compliance_level='2b',
finalize=True
)
# Upload PDF to FileCloud
with open(output_pdf_path, 'rb') as pdf_file:
upload_result = upload_document_to_filecloud(
user=user,
document=document,
file_content=pdf_file.read(),
file_path=pdf_file_path,
metadata=None
)
if not upload_result.get('success', False):
raise BusinessRuleError(f"Failed to upload PDF to FileCloud: {upload_result.get('message', 'Unknown error')}")
# Update document version with PDF path
version.pdf_file_path = pdf_file_path
# Log conversion event
audit_trail.log_document_lifecycle_event(
event_type="DOCUMENT_CONVERTED_TO_PDF",
user=user,
document_uid=document_uid,
details={
'version_uid': version.uid,
'version_number': version.version_number,
'pdf_path': pdf_file_path,
'includes_signatures': include_signatures,
'is_pdfa': convert_to_pdfa,
'has_watermark': add_watermark
}
)
return {
'success': True,
'message': 'Document successfully converted to PDF with full audit trail and security features',
'document_uid': document_uid,
'version_uid': version.uid,
'version_number': version.version_number,
'pdf_path': pdf_file_path
}
except Exception as e:
logger.error(f"Error in document conversion process: {str(e)}")
raise BusinessRuleError(f"Failed to convert document to PDF: {str(e)}")
finally:
# Clean up temporary directory
try:
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
except:
logger.warning(f"Failed to remove temporary directory: {temp_dir}")
except (ResourceNotFoundError, ValidationError, PermissionError, BusinessRuleError) as e:
# Re-raise known errors
raise
except Exception as e:
logger.error(f"Error converting document to PDF: {str(e)}")
raise BusinessRuleError(f"Failed to convert document to PDF: {str(e)}")
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
user |
DocUser | - | positional_or_keyword |
document_uid |
str | - | positional_or_keyword |
version_uid |
Optional[str] | None | positional_or_keyword |
include_signatures |
bool | True | positional_or_keyword |
convert_to_pdfa |
bool | True | positional_or_keyword |
add_watermark |
bool | False | positional_or_keyword |
Parameter Details
user: DocUser object representing the authenticated user performing the conversion. Used for permission checks, audit logging, and FileCloud operations.
document_uid: Unique identifier (string) of the controlled document to convert. Must exist in the system or ResourceNotFoundError is raised.
version_uid: Optional unique identifier (string) of a specific document version to convert. If None, the current/latest version of the document is used. Must belong to the specified document.
include_signatures: Boolean flag (default True) indicating whether to include signature images in the generated audit page of the PDF. Useful for compliance documentation.
convert_to_pdfa: Boolean flag (default True) indicating whether to convert the output to PDF/A format (archival standard). Uses compliance level '2b' for long-term preservation.
add_watermark: Boolean flag (default False) indicating whether to add a watermark image to the document. Uses the system logo from settings.LOGO_PATH if available.
Return Value
Type: Dict[str, Any]
Returns a dictionary with conversion results. On success: {'success': True, 'message': str, 'document_uid': str, 'version_uid': str, 'version_number': int, 'pdf_path': str}. If PDF already exists: {'success': True, 'message': 'PDF version already exists', 'document_uid': str, 'version_uid': str, 'pdf_path': str}. The pdf_path is the FileCloud storage path. Raises ResourceNotFoundError if document/version not found, BusinessRuleError for conversion failures, ValidationError for invalid inputs, or PermissionError if user lacks CONVERT_DOCUMENT permission.
Dependencies
logginguuidostempfiletypingdatetimeiopanelshutiltracebackjsonrerandomCDocsdocument_auditor
Required Imports
import logging
import os
import tempfile
import shutil
import json
from typing import Dict, Any, Optional
from CDocs.models.document import ControlledDocument, DocumentVersion
from CDocs.models.user_extensions import DocUser
from CDocs.utils import audit_trail
from CDocs.config import settings
from CDocs.controllers import ResourceNotFoundError, ValidationError, PermissionError, BusinessRuleError
from CDocs.controllers.filecloud_controller import get_filecloud_document_path, upload_document_to_filecloud, get_filecloud_client
Conditional/Optional Imports
These imports are only needed under specific conditions:
from document_auditor.src.document_processor import DocumentProcessor
Condition: Required for PDF conversion with audit trail features. Imported inside the function after temporary files are prepared.
Required (conditional)from CDocs.controllers.filecloud_controller import get_filecloud_document_path
Condition: Imported inside the function to get FileCloud paths for document storage.
Required (conditional)Usage Example
from CDocs.models.user_extensions import DocUser
from CDocs.controllers.document_controller import convert_document_to_pdf
# Get authenticated user
user = DocUser(uid='user123')
# Convert current version to PDF with all features
result = convert_document_to_pdf(
user=user,
document_uid='doc-12345',
version_uid=None, # Use current version
include_signatures=True,
convert_to_pdfa=True,
add_watermark=True
)
if result['success']:
print(f"PDF created at: {result['pdf_path']}")
print(f"Version: {result['version_number']}")
else:
print(f"Conversion failed: {result['message']}")
# Convert specific version without watermark
result = convert_document_to_pdf(
user=user,
document_uid='doc-12345',
version_uid='version-67890',
include_signatures=True,
convert_to_pdfa=True,
add_watermark=False
)
Best Practices
- Always handle the returned dictionary to check 'success' key before accessing other fields
- The function automatically cleans up temporary files in a finally block, but ensure proper exception handling in calling code
- If PDF already exists for a version, the function returns early without reprocessing - check the message field
- The function requires CONVERT_DOCUMENT permission via decorator - ensure user has appropriate permissions
- Temporary directory creation and cleanup is handled internally, but disk space should be available for document processing
- The function logs all conversion events to audit trail - ensure audit logging is properly configured
- For large documents, conversion may take significant time - consider implementing timeout handling in production
- The function uses prepare_audit_data_for_document_processor which must be defined in the same module or imported
- PDF/A conversion with compliance level '2b' is used for archival - this may increase file size
- Watermark feature requires settings.LOGO_PATH to point to a valid image file
- The function updates the DocumentVersion object with pdf_file_path - ensure database transactions are properly managed
- FileCloud upload failures will raise BusinessRuleError - implement retry logic if needed for production environments
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function convert_document_to_pdf_v1 92.0% similar
-
function download_document_version 72.0% similar
-
function download_document_version_v1 71.2% similar
-
function upload_document_to_filecloud 70.3% similar
-
function create_document_v1 68.6% similar