function main_v6
Main execution function that orchestrates the import of controlled documents from FileCloud into a Neo4j database, checking for duplicates and managing document metadata.
/tf/active/vicechatdev/CDocs/FC_sync.py
483 - 565
complex
Purpose
This function serves as the entry point for a document import script that searches FileCloud for controlled documents, validates them against existing records in Neo4j (by UID or document number), and imports new documents while tracking success/failure statistics. It handles duplicate detection, error logging, and provides a comprehensive summary of the import operation.
Source Code
def main():
"""Main execution function"""
try:
logger.info("Starting FileCloud document import script")
# Get admin user for document creation
admin_user = get_admin_user()
if not admin_user:
logger.error("Cannot proceed without an admin user")
return
# Search for controlled documents in FileCloud
documents = search_filecloud_for_documents()
if not documents:
logger.info("No documents found to import")
return
# Process found documents
imported_count = 0
skipped_count = 0
failed_count = 0
for doc in documents:
file_path = doc.get('file_path')
metadata = doc.get('metadata', {})
# First check if document has a cdoc_uid in metadata
cdoc_uid = metadata.get('cdoc_uid')
if cdoc_uid:
logger.info(f"Found document with cdoc_uid: {cdoc_uid}")
# Check if this document exists in Neo4j by UID
existing_doc = check_document_exists_by_uid(cdoc_uid)
if existing_doc:
logger.info(f"Document with UID {cdoc_uid} is already managed in Neo4j - skipping")
skipped_count += 1
continue
# If we have a cdoc_uid but it's not in Neo4j, this means the document
# was meant to be managed but isn't - import it with that ID
logger.info(f"Document with UID {cdoc_uid} not found in Neo4j but has cdoc_uid - will import")
# Then check by document number as fallback
doc_number = metadata.get('doc_number')
if doc_number and not file_path:
logger.warning(f"Skipping document with missing file_path: {doc_number}")
skipped_count += 1
continue
if doc_number and not cdoc_uid:
# Skip if document already exists in database by number
existing_doc = check_document_exists_by_doc_number(doc_number)
if existing_doc:
logger.info(f"Document {doc_number} already exists in database by number - skipping")
skipped_count += 1
continue
# Import document
logger.info("file path: " + file_path)
logger.info("metadata: " + str(metadata))
logger.info("admin_user: " + str(admin_user.name))
result = import_document_from_filecloud(file_path, metadata, admin_user)
#result=None
if result and result.get('success', False):
imported_count += 1
logger.info(f"Successfully imported document: {result.get('doc_number')}")
else:
failed_count += 1
error_msg = result.get('message') if result else "Unknown error"
logger.error(f"Failed to import document: {error_msg}")
# Report summary
logger.info("===== Import Summary =====")
logger.info(f"Total documents found in FileCloud: {len(documents)}")
logger.info(f"Documents imported: {imported_count}")
logger.info(f"Documents skipped (already exist): {skipped_count}")
logger.info(f"Documents failed to import: {failed_count}")
logger.info("=========================")
except Exception as e:
logger.error(f"Error in main execution: {e}")
import traceback
logger.error(traceback.format_exc())
Return Value
Returns None. The function performs side effects (importing documents, logging results) rather than returning a value. Success/failure information is logged and tracked internally through counters (imported_count, skipped_count, failed_count).
Dependencies
ossysloggingtempfileuuidiotypingdatetimeCDocs.db.db_operationsCDocs.models.documentCDocs.models.user_extensionsCDocs.controllers.filecloud_controllerCDocs.controllers.document_controllerCDocs.configFC_apimetadata_catalogtraceback
Required Imports
import os
import sys
import logging
import tempfile
import uuid
import io
from typing import Dict, List, Any, Optional
from datetime import datetime
from CDocs.db import db_operations as db
from CDocs.models.document import ControlledDocument, DocumentVersion
from CDocs.models.user_extensions import DocUser
from CDocs.controllers.filecloud_controller import get_filecloud_client, upload_document_to_filecloud, get_filecloud_document_path, ensure_document_folders, FileCloudError
from CDocs.controllers.document_controller import create_document_version, set_current_version
from CDocs.config import settings
from FC_api import FileCloudAPI
from metadata_catalog import MetadataCatalog
import traceback
Usage Example
# Ensure logger is configured
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
# Ensure all required helper functions are defined:
# - get_admin_user()
# - search_filecloud_for_documents()
# - check_document_exists_by_uid(cdoc_uid)
# - check_document_exists_by_doc_number(doc_number)
# - import_document_from_filecloud(file_path, metadata, admin_user)
# Execute the main import process
if __name__ == '__main__':
main()
# Output will be logged showing:
# - Documents found in FileCloud
# - Import progress for each document
# - Final summary with counts of imported/skipped/failed documents
Best Practices
- Ensure proper logging configuration before calling this function to capture all import activities
- Verify that an admin user exists in the system before running the import
- The function performs duplicate checking using both cdoc_uid (preferred) and doc_number (fallback) to prevent duplicate imports
- Documents with cdoc_uid metadata take precedence over doc_number for duplicate detection
- Monitor the import summary logs to track success rates and identify issues
- Handle FileCloud connection errors gracefully - the function will log errors but continue processing remaining documents
- Ensure sufficient disk space and memory for processing large document sets
- Run this function in a controlled environment as it performs database writes and file operations
- Consider implementing rate limiting if importing large numbers of documents to avoid overwhelming FileCloud API
- Review failed_count in the summary to identify and address systematic import issues
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function import_document_from_filecloud 65.6% similar
-
function main_v4 62.7% similar
-
function main 61.5% similar
-
function create_document 61.0% similar
-
function create_document_legacy 59.5% similar