function create_word_report
Generates a formatted Microsoft Word document report containing warranty disclosures with a table of contents, metadata, and structured sections for each warranty.
/tf/active/vicechatdev/convert_disclosures_to_table.py
233 - 371
complex
Purpose
This function creates a comprehensive Word document report for Project Victoria warranty disclosures. It processes a list of warranty dictionaries and formats them into a professional document with proper heading hierarchy, a table of contents, metadata (generation date, counts), and detailed sections for each warranty including warranty text and disclosure content. The function intelligently parses disclosure content to detect and apply appropriate heading styles based on markdown syntax, numbering patterns, and formatting conventions.
Source Code
def create_word_report(warranties, output_file):
"""Create Word document report with proper heading styles."""
logger.info(f"Creating Word report: {output_file}")
try:
# Create a new document
doc = Document()
# Add document title
title = doc.add_heading('Project Victoria - Warranty Disclosures', 0)
title.alignment = 1 # Center alignment
# Add document metadata
doc.add_paragraph(f"Generated on: {datetime.now().strftime('%B %d, %Y at %H:%M:%S')}")
doc.add_paragraph(f"Total Warranties Processed: {len(warranties)}")
doc.add_paragraph(f"Total Disclosures Generated: {len(warranties)}")
# Add a page break
doc.add_page_break()
# Add table of contents header
toc_heading = doc.add_heading('Table of Contents', level=1)
# Create table of contents
for warranty in sorted(warranties, key=lambda x: x['Warranty_ID']):
warranty_id = warranty['Warranty_ID']
warranty_title = warranty['Warranty_Title']
doc.add_paragraph(f"{warranty_id} - {warranty_title}", style='List Number')
# Add page break before warranties
doc.add_page_break()
# Add each warranty as a section
for warranty in sorted(warranties, key=lambda x: x['Warranty_ID']):
warranty_id = clean_text_for_xml(warranty['Warranty_ID'])
warranty_title = clean_text_for_xml(warranty['Warranty_Title'])
section_name = clean_text_for_xml(warranty['Section_Name'])
source_docs_count = clean_text_for_xml(warranty['Source_Documents_Count'])
warranty_text = clean_text_for_xml(warranty['Warranty_Text'])
disclosure_content = clean_text_for_xml(warranty['Full_Disclosure'])
# Main warranty heading (Level 1)
main_heading = doc.add_heading(f"{warranty_id} - {warranty_title}", level=1)
# Section information
doc.add_paragraph(f"Section: {section_name}", style='Heading 2')
doc.add_paragraph(f"Source Documents Found: {source_docs_count}")
# Warranty Text subsection
doc.add_heading('Warranty Text', level=2)
warranty_para = doc.add_paragraph(warranty_text)
warranty_para.style = 'Quote'
# Disclosure subsection
doc.add_heading('Disclosure', level=2)
# Parse disclosure content more carefully to preserve heading structure
if disclosure_content:
# Split by lines and process each line to detect headings
lines = disclosure_content.split('\n')
current_paragraph = []
for line in lines:
line = line.strip()
if not line:
# Empty line - finish current paragraph if any
if current_paragraph:
doc.add_paragraph(' '.join(current_paragraph))
current_paragraph = []
continue
# Check for different heading patterns
heading_level = None
clean_text = line
# Check for markdown-style headings (# ## ### etc.)
if line.startswith('#'):
heading_level = min(line.count('#') + 2, 6) # +2 because Disclosure is level 2
clean_text = line.lstrip('#').strip()
# Check for bold headings (**text**)
elif line.startswith('**') and line.endswith('**') and len(line) > 4:
heading_level = 3
clean_text = line[2:-2].strip()
# Check for numbered/lettered headings
elif (any(line.startswith(prefix) for prefix in ['## ', '### ', '#### ']) or
re.match(r'^[IVX]+\.\s', line) or # Roman numerals
re.match(r'^[A-Z]\.\s', line) or # Capital letters
re.match(r'^\d+\.\s', line) or # Numbers
re.match(r'^[a-z]\)\s', line)): # Lower case with parenthesis
heading_level = 3
# Don't clean the text for these as the numbering is important
# Check for section-like headers (words ending with colon)
elif line.endswith(':') and len(line.split()) <= 4 and not line.startswith('-'):
heading_level = 4
clean_text = line[:-1].strip() # Remove the colon
# Check for emphasized patterns that look like headings
elif (line.isupper() and len(line.split()) <= 5) or \
(line.startswith('- **') and line.endswith('**:')) or \
re.match(r'^[A-Z][a-z]+ [A-Z][a-z]+.*:$', line):
heading_level = 4
clean_text = line.replace('**', '').replace('- ', '').rstrip(':').strip()
if heading_level:
# Finish current paragraph if any
if current_paragraph:
doc.add_paragraph(' '.join(current_paragraph))
current_paragraph = []
# Add heading
doc.add_heading(clean_text, level=heading_level)
else:
# Regular text - add to current paragraph
current_paragraph.append(line)
# Finish any remaining paragraph
if current_paragraph:
doc.add_paragraph(' '.join(current_paragraph))
else:
doc.add_paragraph("No disclosure content available.")
# Add separator between warranties
doc.add_paragraph("_" * 80)
doc.add_paragraph() # Empty line
# Save the document
doc.save(output_file)
logger.info(f"Created Word report: {output_file}")
return True
except ImportError:
logger.warning("python-docx not available, skipping Word export")
return False
except Exception as e:
logger.error(f"Error creating Word document: {e}")
return False
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
warranties |
- | - | positional_or_keyword |
output_file |
- | - | positional_or_keyword |
Parameter Details
warranties: A list of dictionaries where each dictionary represents a warranty. Each warranty dictionary must contain the following keys: 'Warranty_ID' (unique identifier), 'Warranty_Title' (title of the warranty), 'Section_Name' (section classification), 'Source_Documents_Count' (number of source documents), 'Warranty_Text' (the actual warranty text), and 'Full_Disclosure' (the disclosure content). The list will be sorted by Warranty_ID for consistent ordering.
output_file: String or Path object specifying the file path where the Word document should be saved. Should include the .docx extension. The directory must exist or be writable.
Return Value
Returns a boolean value: True if the Word document was successfully created and saved, False if an error occurred (such as missing python-docx library or file writing errors). The function logs appropriate messages for success and failure cases.
Dependencies
python-docxdatetimeloggingre
Required Imports
from docx import Document
from datetime import datetime
import logging
import re
Conditional/Optional Imports
These imports are only needed under specific conditions:
from docx import Document
Condition: Required for Word document creation; function returns False with warning if not available
Required (conditional)from docx.shared import Inches
Condition: Listed in source file imports but not used in this function
Optionalfrom docx.enum.style import WD_STYLE_TYPE
Condition: Listed in source file imports but not used in this function
OptionalUsage Example
import logging
from datetime import datetime
from docx import Document
import re
# Setup logger
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
# Define clean_text_for_xml helper function
def clean_text_for_xml(text):
if text is None:
return ''
return str(text).replace('&', '&').replace('<', '<').replace('>', '>')
# Prepare warranty data
warranties = [
{
'Warranty_ID': 'W001',
'Warranty_Title': 'Product Quality Warranty',
'Section_Name': 'Quality Assurance',
'Source_Documents_Count': '5',
'Warranty_Text': 'All products are warranted to be free from defects.',
'Full_Disclosure': '## Overview\n\nThis warranty covers manufacturing defects.\n\n**Coverage Period**: 12 months\n\n### Exclusions\n\n- Normal wear and tear\n- Misuse or abuse'
},
{
'Warranty_ID': 'W002',
'Warranty_Title': 'Service Warranty',
'Section_Name': 'Service Terms',
'Source_Documents_Count': '3',
'Warranty_Text': 'Services are warranted for 90 days.',
'Full_Disclosure': 'Service warranty details here.'
}
]
# Create the Word report
output_path = 'warranty_report.docx'
success = create_word_report(warranties, output_path)
if success:
print(f'Report created successfully: {output_path}')
else:
print('Failed to create report')
Best Practices
- Ensure the 'clean_text_for_xml' function is defined in the module scope before calling this function
- Configure a logger instance named 'logger' at module level for proper logging output
- Validate that all warranty dictionaries contain the required keys before passing to this function
- Ensure the output directory exists and has write permissions before calling
- Install python-docx library (pip install python-docx) before using this function
- The function gracefully handles missing python-docx by returning False with a warning
- Warranty data is automatically sorted by Warranty_ID for consistent document structure
- The disclosure content parser supports multiple heading formats including markdown (#), bold (**text**), numbered lists, and colon-terminated headers
- Consider the file size when processing large numbers of warranties as Word documents can become large
- The function uses try-except blocks to handle errors gracefully and logs detailed error messages
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function create_word_report_improved 95.5% similar
-
function create_enhanced_word_document 85.3% similar
-
function create_enhanced_word_document_v1 81.6% similar
-
function main_v1 75.8% similar
-
function main_v15 74.4% similar