class ContractDataExtractor
Extract structured data from legal contracts using LLM analysis
/tf/active/vicechatdev/contract_validity_analyzer/extractor.py
43 - 691
moderate
Purpose
Extract structured data from legal contracts using LLM analysis
Source Code
class ContractDataExtractor:
"""Extract structured data from legal contracts using LLM analysis"""
def __init__(self, config: Config, limit: Optional[int] = None):
"""
Initialize the contract data extractor
Args:
config: Configuration object
limit: Optional limit on number of documents to process
"""
self.config = config
self.limit = limit
self.logger = get_logger(__name__)
# Initialize OpenAI client
if OPENAI_AVAILABLE:
api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
raise ValueError("OPENAI_API_KEY environment variable not set")
self.openai_client = OpenAI(api_key=api_key)
self.logger.info("OpenAI client initialized")
else:
raise RuntimeError("OpenAI library is required for LLM extraction")
# Initialize FileCloud client
self.fc_client = FileCloudClient(config.get_section('filecloud'))
# Initialize document processor
self.doc_processor = DocumentProcessor(config)
# Define CSV field mapping based on contracts.csv
self.csv_fields = [
'filecloud_filename',
'filecloud_creation_date',
'contract_summary',
'function_research_clinops_etc',
'work_spend_category',
'vicebio_owner',
'sny_bus_owner',
'sny_pro_poc',
'contract_strategy',
'comments_i',
'assets_product',
'type_of_vendor',
'vicebio_contracting_party',
'vicebio_issuing_country',
'sny_absorbing_le',
'vendor_name',
'vendor_id',
'address',
'contract_type',
'parent_contract_id',
'contract_id',
'link_to_pdf',
'contract_downloaded',
'contract_name',
'service_scope_of_work',
'estimated_amount_original_currency',
'currency',
'estimated_amount_euro',
'effective_date',
'end_date_auto_renewal',
'contract_status',
'contract_duration',
'criticality_priority',
'comments_ii',
'term_and_termination',
'ip_exclusivity',
'change_control',
'assignment_options',
'comments_iii',
'forecast_financial_provision',
'remarks_observations',
'risks',
'vicebio_spend_2024',
'vicebio_spend_2025',
'supplier_known_sanofi',
'sanofi_2024_spend',
'remarks'
]
self.extracted_contracts = []
def connect_filecloud(self) -> bool:
"""Connect to FileCloud server"""
self.logger.info("Connecting to FileCloud...")
if self.fc_client.connect():
self.logger.info("✅ Connected to FileCloud successfully")
return True
else:
self.logger.error("❌ Failed to connect to FileCloud")
return False
def search_contracts(self, path: str) -> List[Dict[str, Any]]:
"""
Search for contract documents in specified FileCloud path
Args:
path: FileCloud path to search
Returns:
List of document metadata dictionaries
"""
self.logger.info(f"Searching for contracts in: {path}")
# Search for documents in the specified path
documents = self.fc_client.search_documents(path=path)
if self.limit and len(documents) > self.limit:
self.logger.info(f"Limiting results to {self.limit} documents (found {len(documents)})")
documents = documents[:self.limit]
self.logger.info(f"Found {len(documents)} contract documents to process")
return documents
def download_contract(self, document: Dict[str, Any]) -> Optional[str]:
"""
Download contract document from FileCloud
Args:
document: Document metadata dictionary
Returns:
Local file path if successful, None otherwise
"""
try:
filename = document.get('filename', 'unknown')
full_path = document.get('full_path', '')
if not full_path:
self.logger.error(f"No full_path found in document: {filename}")
return None
self.logger.info(f"Downloading: {filename}")
# Download document content
content = self.fc_client.download_document(full_path)
if content:
# Save to temporary file
import tempfile
suffix = os.path.splitext(filename)[1]
with tempfile.NamedTemporaryFile(mode='wb', suffix=suffix, delete=False) as tmp_file:
tmp_file.write(content)
local_path = tmp_file.name
self.logger.info(f"✅ Downloaded to: {local_path}")
return local_path
else:
self.logger.error(f"❌ Failed to download: {filename}")
return None
except Exception as e:
self.logger.error(f"Error downloading document: {e}")
return None
def extract_text_from_contract(self, file_path: str) -> Optional[str]:
"""
Extract text content from contract document
Args:
file_path: Local path to document file
Returns:
Text content if successful, None otherwise
"""
try:
self.logger.info(f"Extracting text from: {os.path.basename(file_path)}")
# For PDFs, use OCR extraction for better accuracy (especially for scanned/signed docs)
_, ext = os.path.splitext(file_path)
if ext.lower() == '.pdf':
self.logger.info("Using OCR extraction for PDF")
result = self.doc_processor.process_document_with_ocr(file_path)
else:
# Use regular extraction for other formats
result = self.doc_processor.process_document(file_path)
if result and result.get('success'):
text_content = result.get('text', '')
self.logger.info(f"✅ Extracted {len(text_content)} characters")
return text_content
else:
error = result.get('error', 'Unknown error') if result else 'No result'
self.logger.error(f"❌ Failed to extract text: {error}")
return None
except Exception as e:
self.logger.error(f"Error extracting text: {e}")
return None
def extract_contract_data_with_llm(self, text_content: str, filename: str, filecloud_path: str = "") -> Dict[str, Any]:
"""
Extract structured contract data using LLM analysis
Args:
text_content: Full text content of the contract
filename: Name of the contract file
filecloud_path: FileCloud path (may contain vendor name hint)
Returns:
Dictionary of extracted contract data
"""
self.logger.info(f"Analyzing contract with LLM: {filename}")
# Build comprehensive extraction prompt
prompt = self._build_extraction_prompt(text_content, filename, filecloud_path)
try:
# Call OpenAI API with structured extraction
response = self.openai_client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": """You are an expert legal contract analyst specializing in extracting structured data from legal agreements.
Your task is to carefully read contract documents and extract specific information with high accuracy.
Pay special attention to dates, parties, financial terms, and contractual obligations.
For dates, calculate end dates based on contract terms (e.g., "5 years from effective date").
Return data in valid JSON format."""
},
{
"role": "user",
"content": prompt
}
],
temperature=0.1, # Low temperature for consistent, accurate extraction
max_tokens=8000 # Increased for comprehensive contract analysis
)
# Parse response
response_text = response.choices[0].message.content.strip()
# Extract JSON from response (handle code blocks)
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL)
if json_match:
json_str = json_match.group(1)
else:
json_str = response_text
# Parse JSON
extracted_data = json.loads(json_str)
self.logger.info(f"✅ Successfully extracted contract data")
return extracted_data
except json.JSONDecodeError as e:
self.logger.error(f"❌ Failed to parse LLM response as JSON: {e}")
self.logger.error(f"Response text: {response_text[:500]}...")
return {}
except Exception as e:
self.logger.error(f"❌ Error during LLM extraction: {e}")
return {}
def _build_extraction_prompt(self, text_content: str, filename: str, filecloud_path: str = "") -> str:
"""
Build detailed extraction prompt for LLM
Args:
text_content: Contract text content
filename: Contract filename
filecloud_path: FileCloud path (may contain vendor name)
Returns:
Formatted prompt string
"""
# Truncate text if too long (keep first and last portions)
max_chars = 15000
if len(text_content) > max_chars:
truncated = text_content[:max_chars//2] + "\n\n[... MIDDLE SECTION TRUNCATED ...]\n\n" + text_content[-max_chars//2:]
text_content = truncated
# Extract potential vendor name from path
vendor_hint = ""
if filecloud_path:
# Extract folder name before "/4. PO/" or similar patterns
parts = filecloud_path.split('/')
for i, part in enumerate(parts):
if 'Third Parties' in part and i + 1 < len(parts):
potential_vendor = parts[i + 1]
vendor_hint = f"\n**VENDOR NAME HINT (from folder structure):** {potential_vendor}"
break
prompt = f"""Extract structured data from the following legal contract document.
**CONTRACT FILENAME:** {filename}{vendor_hint}
**CONTRACT TEXT:**
{text_content}
---
**EXTRACTION INSTRUCTIONS:**
Extract the following information from the contract. If information is not found, use null or "Not specified".
**CRITICAL: PARTY IDENTIFICATION**
This is a contract BETWEEN two parties. You must identify which is ViceBio and which is the vendor/third party.
- **ViceBio Contracting Party**: This is the ViceBio entity signing the contract (e.g., "Vicebio Limited", "Vicebio Australia Pty Ltd")
- ViceBio is the CUSTOMER/CLIENT/BUYER in the contract
- Look for ViceBio entity in the signature block, party definitions, or header
- **Vendor/Third Party**: This is the OTHER party - the external company providing services/products TO ViceBio
- The vendor is NOT ViceBio - it's the external company (CRO, lab, supplier, consultant, service provider)
- Extract the vendor's full legal name with registration details if available
- Use the vendor name hint from the folder structure if parties are unclear in the document
**EXAMPLES OF PARTY IDENTIFICATION:**
- Purchase Order for lab supplies: Vendor = Lab supplier (e.g., "Promega Benelux B.V"), ViceBio = Buyer
- Service Agreement for testing: Vendor = Testing lab (e.g., "360Biolabs Pty Ltd"), ViceBio = Client
- CRO Agreement: Vendor = CRO company, ViceBio = Sponsor
- Consulting Agreement: Vendor = Consultant/firm, ViceBio = Client
**CRITICAL: DATE EXTRACTION RULES**
1. **Effective Date**: Find the contract start date (look for "Effective Date", "Commencement Date", "Date of Agreement", signature dates)
2. **End Date Calculation**:
- If explicit end date is stated, use it
- If contract states duration (e.g., "5 years", "36 months"), CALCULATE the end date by adding duration to effective date
- If "evergreen" or "perpetual", use "Evergreen" or "Perpetual"
- If automatic renewal clause exists, note: "End Date + [renewal terms]"
- Format all dates as DD/MM/YYYY or MM/DD/YYYY (preserve original format)
- Example: Effective date 10/03/2023 + 2 years = End date 09/03/2025
3. **Contract Status**: Based on dates:
- Compare end date to current date ({datetime.now().strftime('%d/%m/%Y')})
- If end date has passed: "expired"
- If end date is future: "active"
- If evergreen/perpetual: "active"
**FIELDS TO EXTRACT:**
Return JSON with the following structure:
{{
"contract_summary": "A concise 1-paragraph summary (3-5 sentences) describing: what this contract is for, who the parties are, key terms (dates, amounts, scope), and main obligations",
"vendor_name": "Full legal name of the THIRD PARTY vendor/contractor (NOT ViceBio - this is the external company providing services/products)",
"vicebio_contracting_party": "ViceBio entity signing the contract (e.g., 'Vicebio Limited', 'Vicebio Australia Pty Ltd')",
"vicebio_issuing_country": "Country of ViceBio entity (e.g., 'England', 'Australia', 'Belgium')",
"address": "Full address of the vendor/contractor (the third party, NOT ViceBio)",
"contract_type": "Type of agreement (e.g., 'MSA', 'CTA', 'Purchase Order', 'License Agreement', 'Service Agreement')",
"contract_name": "Full name/title of the contract",
"service_scope_of_work": "Brief description of services or scope",
"effective_date": "Contract start date in DD/MM/YYYY or MM/DD/YYYY format",
"contract_duration": "Duration stated (e.g., '5 years', '36 months', 'evergreen')",
"end_date_auto_renewal": "Calculated or stated end date in DD/MM/YYYY format, note if auto-renewal",
"contract_status": "active or expired (calculate based on end date)",
"estimated_amount_original_currency": "Total contract value as number (no currency symbol)",
"currency": "Currency code (e.g., 'USD', 'EUR', 'AUD', 'GBP')",
"term_and_termination": "Summary of termination clauses (notice periods, early termination rights)",
"ip_exclusivity": "Summary of IP ownership and exclusivity terms",
"assignment_options": "Summary of assignment and transfer rights",
"forecast_financial_provision": "Payment terms, fee structure, pricing schemes",
"remarks_observations": "Any other notable terms or observations",
"assets_product": "Products or assets covered (if applicable)",
"parent_contract_id": "Reference to parent/master agreement (if this is an amendment/work order)",
"type_of_vendor": "Classification (e.g., 'Major Service Providers', 'Supplier', 'CRO')",
"criticality_priority": "Assess importance: 'high', 'medium', or 'low'",
"risks": "Risk assessment: 'H' (high), 'M' (medium), 'L' (low), or 'NO' if no risks"
}}
**IMPORTANT REMINDERS:**
- **VENDOR vs VICEBIO**: The vendor is ALWAYS the external third party (CRO, supplier, lab, consultant), NEVER ViceBio
- In a Purchase Order: The vendor is who ViceBio is buying FROM (the supplier)
- In a Service Agreement: The vendor is who is providing services TO ViceBio
- ViceBio is always the customer/client receiving goods or services
- Check the vendor name hint from the folder structure if parties are unclear in the document
- **ADDRESS**: Extract the address of the VENDOR/third party, NOT ViceBio's address
- Look for supplier address, vendor address, service provider address
- If document only shows ViceBio's delivery address, state "Not specified in document"
- Extract dates exactly as they appear, but CALCULATE end dates if only duration is given
- For amounts, extract only the number (e.g., extract "50000" from "$50,000" or "50K")
- Be precise with legal entity names (full legal names with ABN/ACN if present)
- Look for renewal clauses and note in end_date_auto_renewal field
- If contract states "upon completion of work", note this in end_date_auto_renewal
- Cross-reference signature pages for effective dates if not stated in main text
Return ONLY the JSON object, no additional text."""
return prompt
def process_contracts(self, path: str) -> List[Dict[str, Any]]:
"""
Main processing pipeline: search, download, extract, and analyze contracts
Args:
path: FileCloud path to search for contracts
Returns:
List of extracted contract data dictionaries
"""
# Connect to FileCloud
if not self.connect_filecloud():
return []
# Search for contracts
documents = self.search_contracts(path)
if not documents:
self.logger.warning("No documents found to process")
return []
# Process each contract
results = []
for i, document in enumerate(documents, 1):
self.logger.info(f"\n{'='*80}")
self.logger.info(f"Processing contract {i}/{len(documents)}")
self.logger.info(f"{'='*80}")
filename = document.get('filename', 'unknown')
try:
# Download contract
local_path = self.download_contract(document)
if not local_path:
self.logger.error(f"Skipping {filename}: Download failed")
continue
# Extract text
text_content = self.extract_text_from_contract(local_path)
if not text_content:
self.logger.error(f"Skipping {filename}: Text extraction failed")
continue
# Extract structured data with LLM (pass FileCloud path for vendor hint)
filecloud_path = document.get('full_path', document.get('path', ''))
contract_data = self.extract_contract_data_with_llm(text_content, filename, filecloud_path)
if contract_data:
# Generate full FileCloud URL
full_path = document.get('full_path', '')
filecloud_url = self.fc_client.get_document_url(full_path) if full_path else ''
# Get file metadata for creation date (FileCloud uses 'modifiediso')
file_info = self.fc_client.get_file_info(full_path) if full_path else None
creation_date = file_info.get('modifiediso', '') if file_info else ''
# Add metadata
contract_data['filecloud_filename'] = filename
contract_data['filecloud_creation_date'] = creation_date
contract_data['source_file'] = filename
contract_data['filecloud_path'] = document.get('path', '')
contract_data['processing_date'] = datetime.now().isoformat()
contract_data['contract_downloaded'] = 'YES'
contract_data['link_to_pdf'] = filecloud_url
results.append(contract_data)
self.logger.info(f"✅ Successfully processed: {filename}")
else:
self.logger.error(f"❌ Failed to extract data from: {filename}")
# Clean up downloaded file
try:
os.remove(local_path)
except:
pass
except Exception as e:
self.logger.error(f"❌ Error processing {filename}: {e}")
continue
# Disconnect from FileCloud
self.fc_client.disconnect()
self.logger.info(f"\n{'='*80}")
self.logger.info(f"Processing complete: {len(results)}/{len(documents)} contracts successfully extracted")
self.logger.info(f"{'='*80}\n")
return results
def save_to_csv(self, results: List[Dict[str, Any]], output_path: str) -> None:
"""
Save extracted contract data to CSV file
Args:
results: List of extracted contract data dictionaries
output_path: Path to output CSV file
"""
if not results:
self.logger.warning("No results to save")
return
self.logger.info(f"Saving {len(results)} contracts to CSV: {output_path}")
try:
# Map extracted data to CSV fields
csv_rows = []
for contract in results:
row = {}
# Map fields (simplified mapping - expand as needed)
row['Vendor name'] = contract.get('vendor_name', '')
row['ViceBio Contracting Party'] = contract.get('vicebio_contracting_party', '')
row['ViceBio Issuing Country (LE)'] = contract.get('vicebio_issuing_country', '')
row['Address'] = contract.get('address', '')
row['Contract type'] = contract.get('contract_type', '')
row['Contract name/ name of PDF'] = contract.get('contract_name', '')
row['Service / scope of Work'] = contract.get('service_scope_of_work', '')
row['Effective date'] = contract.get('effective_date', '')
row['End Date [automatic renewal?]'] = contract.get('end_date_auto_renewal', '')
row['Contract Status active/ expired'] = contract.get('contract_status', '')
row['Estimated amount original Currency'] = contract.get('estimated_amount_original_currency', '')
row['CURR'] = contract.get('currency', '')
row['Term and Termination'] = contract.get('term_and_termination', '')
row['IP/Exclusivity'] = contract.get('ip_exclusivity', '')
row['Assignment options'] = contract.get('assignment_options', '')
row['Forecast & Financial Provision'] = contract.get('forecast_financial_provision', '')
row['Remarks/ observations'] = contract.get('remarks_observations', '')
row['Assets/Product'] = contract.get('assets_product', '')
row['Parent Contract ID'] = contract.get('parent_contract_id', '')
row['Type of Vendor'] = contract.get('type_of_vendor', '')
row['Criticality/ Priority'] = contract.get('criticality_priority', '')
row['Risks'] = contract.get('risks', '')
row['Contract downloaded YES/NO'] = contract.get('contract_downloaded', 'YES')
row['Link to PDF in Teams'] = contract.get('link_to_pdf', '')
row['Source File'] = contract.get('source_file', '')
row['Processing Date'] = contract.get('processing_date', '')
csv_rows.append(row)
# Write to CSV
with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
# Get all unique fieldnames
fieldnames = set()
for row in csv_rows:
fieldnames.update(row.keys())
fieldnames = sorted(fieldnames)
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(csv_rows)
self.logger.info(f"✅ Successfully saved to: {output_path}")
except Exception as e:
self.logger.error(f"❌ Error saving to CSV: {e}")
def save_to_json(self, results: List[Dict[str, Any]], output_path: str) -> None:
"""
Save extracted contract data to JSON file
Args:
results: List of extracted contract data dictionaries
output_path: Path to output JSON file
"""
if not results:
self.logger.warning("No results to save")
return
self.logger.info(f"Saving {len(results)} contracts to JSON: {output_path}")
try:
with open(output_path, 'w', encoding='utf-8') as jsonfile:
json.dump(results, jsonfile, indent=2, ensure_ascii=False)
self.logger.info(f"✅ Successfully saved to: {output_path}")
except Exception as e:
self.logger.error(f"❌ Error saving to JSON: {e}")
def save_to_excel(self, results: List[Dict[str, Any]], output_path: str) -> None:
"""
Save extracted contract data to Excel file with formatting
Args:
results: List of extracted contract data dictionaries
output_path: Path to output Excel file
"""
if not results:
self.logger.warning("No results to save")
return
if not OPENPYXL_AVAILABLE:
self.logger.error("openpyxl not available - cannot save to Excel")
return
self.logger.info(f"Saving {len(results)} contracts to Excel: {output_path}")
try:
# Create workbook and worksheet
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Contracts"
# Define header style
header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
header_font = Font(bold=True, color="FFFFFF", size=11)
header_alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
# Define border style
thin_border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
# Get fieldnames
fieldnames = self.csv_fields
# Write headers
for col_num, field in enumerate(fieldnames, 1):
cell = ws.cell(row=1, column=col_num)
cell.value = field
cell.fill = header_fill
cell.font = header_font
cell.alignment = header_alignment
cell.border = thin_border
# Write data rows
for row_num, result in enumerate(results, 2):
for col_num, field in enumerate(fieldnames, 1):
cell = ws.cell(row=row_num, column=col_num)
value = result.get(field, '')
# Handle different data types
if isinstance(value, (list, dict)):
cell.value = json.dumps(value)
else:
cell.value = str(value) if value is not None else ''
cell.border = thin_border
cell.alignment = Alignment(vertical="top", wrap_text=True)
# Auto-size columns (approximate)
for col_num, field in enumerate(fieldnames, 1):
# Get max length in column
max_length = len(field)
for row_num in range(2, len(results) + 2):
cell_value = ws.cell(row=row_num, column=col_num).value
if cell_value:
max_length = max(max_length, len(str(cell_value)[:100])) # Cap at 100 chars for width calc
# Set column width (with max limit)
adjusted_width = min(max_length + 2, 50)
ws.column_dimensions[openpyxl.utils.get_column_letter(col_num)].width = adjusted_width
# Freeze the header row
ws.freeze_panes = "A2"
# Save workbook
wb.save(output_path)
self.logger.info(f"✅ Successfully saved to: {output_path}")
except Exception as e:
self.logger.error(f"❌ Error saving to Excel: {e}")
import traceback
traceback.print_exc()
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
bases: Parameter of type
Return Value
Returns unspecified type
Class Interface
Methods
__init__(self, config, limit)
Purpose: Initialize the contract data extractor Args: config: Configuration object limit: Optional limit on number of documents to process
Parameters:
config: Type: Configlimit: Type: Optional[int]
Returns: None
connect_filecloud(self) -> bool
Purpose: Connect to FileCloud server
Returns: Returns bool
search_contracts(self, path) -> List[Dict[str, Any]]
Purpose: Search for contract documents in specified FileCloud path Args: path: FileCloud path to search Returns: List of document metadata dictionaries
Parameters:
path: Type: str
Returns: Returns List[Dict[str, Any]]
download_contract(self, document) -> Optional[str]
Purpose: Download contract document from FileCloud Args: document: Document metadata dictionary Returns: Local file path if successful, None otherwise
Parameters:
document: Type: Dict[str, Any]
Returns: Returns Optional[str]
extract_text_from_contract(self, file_path) -> Optional[str]
Purpose: Extract text content from contract document Args: file_path: Local path to document file Returns: Text content if successful, None otherwise
Parameters:
file_path: Type: str
Returns: Returns Optional[str]
extract_contract_data_with_llm(self, text_content, filename, filecloud_path) -> Dict[str, Any]
Purpose: Extract structured contract data using LLM analysis Args: text_content: Full text content of the contract filename: Name of the contract file filecloud_path: FileCloud path (may contain vendor name hint) Returns: Dictionary of extracted contract data
Parameters:
text_content: Type: strfilename: Type: strfilecloud_path: Type: str
Returns: Returns Dict[str, Any]
_build_extraction_prompt(self, text_content, filename, filecloud_path) -> str
Purpose: Build detailed extraction prompt for LLM Args: text_content: Contract text content filename: Contract filename filecloud_path: FileCloud path (may contain vendor name) Returns: Formatted prompt string
Parameters:
text_content: Type: strfilename: Type: strfilecloud_path: Type: str
Returns: Returns str
process_contracts(self, path) -> List[Dict[str, Any]]
Purpose: Main processing pipeline: search, download, extract, and analyze contracts Args: path: FileCloud path to search for contracts Returns: List of extracted contract data dictionaries
Parameters:
path: Type: str
Returns: Returns List[Dict[str, Any]]
save_to_csv(self, results, output_path) -> None
Purpose: Save extracted contract data to CSV file Args: results: List of extracted contract data dictionaries output_path: Path to output CSV file
Parameters:
results: Type: List[Dict[str, Any]]output_path: Type: str
Returns: Returns None
save_to_json(self, results, output_path) -> None
Purpose: Save extracted contract data to JSON file Args: results: List of extracted contract data dictionaries output_path: Path to output JSON file
Parameters:
results: Type: List[Dict[str, Any]]output_path: Type: str
Returns: Returns None
save_to_excel(self, results, output_path) -> None
Purpose: Save extracted contract data to Excel file with formatting Args: results: List of extracted contract data dictionaries output_path: Path to output Excel file
Parameters:
results: Type: List[Dict[str, Any]]output_path: Type: str
Returns: Returns None
Required Imports
import os
import sys
import json
import argparse
import csv
Usage Example
# Example usage:
# result = ContractDataExtractor(bases)
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function test_llm_extraction 75.4% similar
-
function test_llm_client 63.2% similar
-
function main 62.9% similar
-
class RegulatoryExtractor 60.9% similar
-
function test_new_fields 57.6% similar