main_v6 - Code Extractor

function main_v6

Maturity: 50

Main execution function that orchestrates the import of controlled documents from FileCloud into a Neo4j database, checking for duplicates and managing document metadata.

File:
/tf/active/vicechatdev/CDocs/FC_sync.py

Lines:
483 - 565

Complexity:
complex

Purpose

This function serves as the entry point for a document import script that searches FileCloud for controlled documents, validates them against existing records in Neo4j (by UID or document number), and imports new documents while tracking success/failure statistics. It handles duplicate detection, error logging, and provides a comprehensive summary of the import operation.

Source Code

def main():
    """Main execution function"""
    try:
        logger.info("Starting FileCloud document import script")
        
        # Get admin user for document creation
        admin_user = get_admin_user()
        if not admin_user:
            logger.error("Cannot proceed without an admin user")
            return
        
        # Search for controlled documents in FileCloud
        documents = search_filecloud_for_documents()
        if not documents:
            logger.info("No documents found to import")
            return
            
        # Process found documents
        imported_count = 0
        skipped_count = 0
        failed_count = 0
        
        for doc in documents:
            file_path = doc.get('file_path')
            metadata = doc.get('metadata', {})
            
            # First check if document has a cdoc_uid in metadata
            cdoc_uid = metadata.get('cdoc_uid')
            if cdoc_uid:
                logger.info(f"Found document with cdoc_uid: {cdoc_uid}")
                # Check if this document exists in Neo4j by UID
                existing_doc = check_document_exists_by_uid(cdoc_uid)
                if existing_doc:
                    logger.info(f"Document with UID {cdoc_uid} is already managed in Neo4j - skipping")
                    skipped_count += 1
                    continue
                    
                # If we have a cdoc_uid but it's not in Neo4j, this means the document 
                # was meant to be managed but isn't - import it with that ID
                logger.info(f"Document with UID {cdoc_uid} not found in Neo4j but has cdoc_uid - will import")
            
            # Then check by document number as fallback
            doc_number = metadata.get('doc_number')
            if doc_number and not file_path:
                logger.warning(f"Skipping document with missing file_path: {doc_number}")
                skipped_count += 1
                continue
            
            if doc_number and not cdoc_uid:
                # Skip if document already exists in database by number
                existing_doc = check_document_exists_by_doc_number(doc_number)
                if existing_doc:
                    logger.info(f"Document {doc_number} already exists in database by number - skipping")
                    skipped_count += 1
                    continue
            
            # Import document
            logger.info("file path: " + file_path)
            logger.info("metadata: " + str(metadata))
            logger.info("admin_user: " + str(admin_user.name))
            result = import_document_from_filecloud(file_path, metadata, admin_user)
            #result=None
            
            if result and result.get('success', False):
                imported_count += 1
                logger.info(f"Successfully imported document: {result.get('doc_number')}")
            else:
                failed_count += 1
                error_msg = result.get('message') if result else "Unknown error"
                logger.error(f"Failed to import document: {error_msg}")
        
        # Report summary
        logger.info("===== Import Summary =====")
        logger.info(f"Total documents found in FileCloud: {len(documents)}")
        logger.info(f"Documents imported: {imported_count}")
        logger.info(f"Documents skipped (already exist): {skipped_count}")
        logger.info(f"Documents failed to import: {failed_count}")
        logger.info("=========================")
        
    except Exception as e:
        logger.error(f"Error in main execution: {e}")
        import traceback
        logger.error(traceback.format_exc())

Return Value

Returns None. The function performs side effects (importing documents, logging results) rather than returning a value. Success/failure information is logged and tracked internally through counters (imported_count, skipped_count, failed_count).

Dependencies

os
sys
logging
tempfile
uuid
io
typing
datetime
CDocs.db.db_operations
CDocs.models.document
CDocs.models.user_extensions
CDocs.controllers.filecloud_controller
CDocs.controllers.document_controller
CDocs.config
FC_api
metadata_catalog
traceback

Required Imports

import os
import sys
import logging
import tempfile
import uuid
import io
from typing import Dict, List, Any, Optional
from datetime import datetime
from CDocs.db import db_operations as db
from CDocs.models.document import ControlledDocument, DocumentVersion
from CDocs.models.user_extensions import DocUser
from CDocs.controllers.filecloud_controller import get_filecloud_client, upload_document_to_filecloud, get_filecloud_document_path, ensure_document_folders, FileCloudError
from CDocs.controllers.document_controller import create_document_version, set_current_version
from CDocs.config import settings
from FC_api import FileCloudAPI
from metadata_catalog import MetadataCatalog
import traceback

Usage Example

# Ensure logger is configured
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)

# Ensure all required helper functions are defined:
# - get_admin_user()
# - search_filecloud_for_documents()
# - check_document_exists_by_uid(cdoc_uid)
# - check_document_exists_by_doc_number(doc_number)
# - import_document_from_filecloud(file_path, metadata, admin_user)

# Execute the main import process
if __name__ == '__main__':
    main()
    # Output will be logged showing:
    # - Documents found in FileCloud
    # - Import progress for each document
    # - Final summary with counts of imported/skipped/failed documents

Best Practices

Ensure proper logging configuration before calling this function to capture all import activities
Verify that an admin user exists in the system before running the import
The function performs duplicate checking using both cdoc_uid (preferred) and doc_number (fallback) to prevent duplicate imports
Documents with cdoc_uid metadata take precedence over doc_number for duplicate detection
Monitor the import summary logs to track success rates and identify issues
Handle FileCloud connection errors gracefully - the function will log errors but continue processing remaining documents
Ensure sufficient disk space and memory for processing large document sets
Run this function in a controlled environment as it performs database writes and file operations
Consider implementing rate limiting if importing large numbers of documents to avoid overwhelming FileCloud API
Review failed_count in the summary to identify and address systematic import issues

Similar Components

AI-powered semantic similarity - components with related functionality:

function import_document_from_filecloud 65.6% similar

Imports a document from FileCloud into the system by extracting metadata, creating a controlled document record, downloading the file content, creating a document version, and uploading it back to FileCloud with proper folder structure.
From: /tf/active/vicechatdev/CDocs/FC_sync.py
function main_v4 62.7% similar

Main entry point function for the Contract Validity Analyzer application that orchestrates configuration loading, logging setup, FileCloud connection, and contract analysis execution.
From: /tf/active/vicechatdev/contract_validity_analyzer/main.py
function main 61.5% similar

Main entry point function for a Legal Contract Data Extractor application that processes contracts from FileCloud, extracts data, and exports results to multiple formats (CSV, Excel, JSON).
From: /tf/active/vicechatdev/contract_validity_analyzer/extractor.py
function create_document 61.0% similar

Creates a new controlled document in a document management system with versioning, audit trails, and optional initial content.
From: /tf/active/vicechatdev/document_controller_backup.py
function create_document_legacy 59.5% similar

Creates a new controlled document in a document management system with versioning, audit trails, and notifications. Generates document nodes in a graph database with relationships to users and versions.
From: /tf/active/vicechatdev/CDocs/controllers/document_controller.py

← Back to Browse

Assistant

Hi! I can help improve this code. Tell me what you'd like to enhance (e.g., "add error handling", "optimize performance", "improve readability", "add type hints").

Code Comparison

Original Code

                            def main():
    """Main execution function"""
    try:
        logger.info("Starting FileCloud document import script")
        
        # Get admin user for document creation
        admin_user = get_admin_user()
        if not admin_user:
            logger.error("Cannot proceed without an admin user")
            return
        
        # Search for controlled documents in FileCloud
        documents = search_filecloud_for_documents()
        if not documents:
            logger.info("No documents found to import")
            return
            
        # Process found documents
        imported_count = 0
        skipped_count = 0
        failed_count = 0
        
        for doc in documents:
            file_path = doc.get('file_path')
            metadata = doc.get('metadata', {})
            
            # First check if document has a cdoc_uid in metadata
            cdoc_uid = metadata.get('cdoc_uid')
            if cdoc_uid:
                logger.info(f"Found document with cdoc_uid: {cdoc_uid}")
                # Check if this document exists in Neo4j by UID
                existing_doc = check_document_exists_by_uid(cdoc_uid)
                if existing_doc:
                    logger.info(f"Document with UID {cdoc_uid} is already managed in Neo4j - skipping")
                    skipped_count += 1
                    continue
                    
                # If we have a cdoc_uid but it's not in Neo4j, this means the document 
                # was meant to be managed but isn't - import it with that ID
                logger.info(f"Document with UID {cdoc_uid} not found in Neo4j but has cdoc_uid - will import")
            
            # Then check by document number as fallback
            doc_number = metadata.get('doc_number')
            if doc_number and not file_path:
                logger.warning(f"Skipping document with missing file_path: {doc_number}")
                skipped_count += 1
                continue
            
            if doc_number and not cdoc_uid:
                # Skip if document already exists in database by number
                existing_doc = check_document_exists_by_doc_number(doc_number)
                if existing_doc:
                    logger.info(f"Document {doc_number} already exists in database by number - skipping")
                    skipped_count += 1
                    continue
            
            # Import document
            logger.info("file path: " + file_path)
            logger.info("metadata: " + str(metadata))
            logger.info("admin_user: " + str(admin_user.name))
            result = import_document_from_filecloud(file_path, metadata, admin_user)
            #result=None
            
            if result and result.get('success', False):
                imported_count += 1
                logger.info(f"Successfully imported document: {result.get('doc_number')}")
            else:
                failed_count += 1
                error_msg = result.get('message') if result else "Unknown error"
                logger.error(f"Failed to import document: {error_msg}")
        
        # Report summary
        logger.info("===== Import Summary =====")
        logger.info(f"Total documents found in FileCloud: {len(documents)}")
        logger.info(f"Documents imported: {imported_count}")
        logger.info(f"Documents skipped (already exist): {skipped_count}")
        logger.info(f"Documents failed to import: {failed_count}")
        logger.info("=========================")
        
    except Exception as e:
        logger.error(f"Error in main execution: {e}")
        import traceback
        logger.error(traceback.format_exc())
                        

Improved Code

🔍 Code Extractor

function main_v6

Purpose

Source Code

Return Value

Dependencies

Required Imports

Usage Example

Best Practices

Tags

Similar Components

function import_document_from_filecloud 65.6% similar

function main_v4 62.7% similar

function main 61.5% similar

function create_document 61.0% similar

function create_document_legacy 59.5% similar

function main_v6

Purpose

Source Code

Return Value

Dependencies

Required Imports

Usage Example

Best Practices

Tags

Similar Components

function import_document_from_filecloud 65.6% similar

function main_v4 62.7% similar

function main 61.5% similar

function create_document 61.0% similar

function create_document_legacy 59.5% similar

✨ Improve Code: main_v6

Code Comparison