dry_run_test - Code Extractor

function dry_run_test

Maturity: 45

Performs a dry run test of SharePoint to FileCloud synchronization, analyzing up to a specified number of documents without actually transferring files.

File:
/tf/active/vicechatdev/SPFCsync/dry_run_test.py

Lines:
40 - 265

Complexity:
complex

Purpose

This function simulates a file synchronization operation between SharePoint and FileCloud to preview what actions would be taken (new uploads, updates, or skips) without modifying any files. It connects to both services, retrieves documents from SharePoint, compares them with FileCloud versions, and provides a detailed report of planned actions. This is useful for validating sync logic, troubleshooting path mappings, and understanding what changes would occur before running an actual sync.

Source Code

def dry_run_test(max_documents=100):
    """
    Perform a dry run test with limited documents
    """
    print("🧪 SharePoint to FileCloud Sync - Dry Run Test")
    print("=" * 60)
    
    # Initialize clients
    config = Config()
    
    print("🔗 Connecting to SharePoint...")
    sp_client = SharePointGraphClient(
        site_url=config.SHAREPOINT_SITE_URL,
        client_id=config.AZURE_CLIENT_ID,
        client_secret=config.AZURE_CLIENT_SECRET
    )
    
    print("🔗 Connecting to FileCloud...")
    fc_client = FileCloudClient(
        server_url=config.FILECLOUD_SERVER_URL,
        username=config.FILECLOUD_USERNAME,
        password=config.FILECLOUD_PASSWORD
    )
    
    # Test connections
    try:
        # SharePoint connection is tested during initialization
        print("✅ SharePoint connection successful")
    except Exception as e:
        print(f"❌ SharePoint connection failed: {e}")
        return
    
    try:
        # FileCloud connection is tested during initialization  
        print("✅ FileCloud connection successful")
    except Exception as e:
        print(f"❌ FileCloud connection failed: {e}")
        return
    
    print(f"\n📊 Discovering up to {max_documents} documents from SharePoint...")
    
    # Get limited documents from SharePoint
    documents = []
    try:
        # Start with root folder and collect documents up to the limit
        sp_client._get_documents_recursive("/", documents, max_files=max_documents)
        
        if not documents:
            print("❌ No documents found in SharePoint")
            return
            
        print(f"✅ Found {len(documents)} documents in SharePoint")
        
    except Exception as e:
        print(f"❌ Error retrieving SharePoint documents: {e}")
        return
    
    print(f"\n🔍 Analyzing sync requirements...")
    print(f"📂 FileCloud base path: {config.FILECLOUD_BASE_PATH}")
    print(f"🌐 FileCloud server: {config.FILECLOUD_SERVER_URL}")
    print("-" * 60)
    
    actions = {
        'upload_new': [],
        'update_existing': [],
        'skip_same': [],
        'errors': []
    }
    
    for i, doc in enumerate(documents, 1):
        print(f"\n[{i}/{len(documents)}] Analyzing: {doc['name']}")
        print(f"   📁 Path: {doc['folder_path']}")
        print(f"   📏 Size: {format_file_size(doc.get('size'))}")
        print(f"   📅 Modified: {format_datetime(doc.get('modified'))}")
        
        try:
            # Construct FileCloud path - let's trace this step by step
            print(f"   🔧 Building FileCloud path:")
            print(f"      SharePoint folder_path: '{doc['folder_path']}'")
            print(f"      SharePoint file name: '{doc['name']}'")
            
            # Start with the configured base path
            base_path = config.FILECLOUD_BASE_PATH
            print(f"      FileCloud base path: '{base_path}'")
            
            # Add the SharePoint folder structure
            sp_folder = doc['folder_path']
            if sp_folder and sp_folder != '/':
                if not base_path.endswith('/'):
                    base_path += '/'
                if sp_folder.startswith('/'):
                    sp_folder = sp_folder[1:]  # Remove leading slash
                fc_path = base_path + sp_folder + '/' + doc['name']
            else:
                # File is in root
                if not base_path.endswith('/'):
                    base_path += '/'
                fc_path = base_path + doc['name']
            
            print(f"      Final FileCloud path: '{fc_path}'")
            print(f"   🔍 Looking for file in FileCloud at: {fc_path}")
            
            # Check if file exists in FileCloud
            print(f"   📡 Calling FileCloud get_file_info with path: '{fc_path}'")
            fc_info = fc_client.get_file_info(fc_path)
            
            if fc_info is None:
                print(f"   📂 File not found in FileCloud (will be uploaded as new)")
                # Let's also try to check if the folder exists
                folder_path = '/'.join(fc_path.split('/')[:-1])
                print(f"   �️  Checking if folder exists: '{folder_path}'")
                try:
                    folder_info = fc_client.get_file_info(folder_path)
                    if folder_info:
                        print(f"      ✅ Parent folder exists in FileCloud")
                    else:
                        print(f"      📁 Parent folder does not exist (will be created)")
                except Exception as e:
                    print(f"      ❓ Could not check parent folder: {e}")
            else:
                print(f"   �📄 File found in FileCloud:")
                print(f"      Size: {format_file_size(fc_info.get('size'))}")
                print(f"      Modified: {format_datetime(fc_info.get('lastmodified'))}")
                print(f"      FileCloud info: {fc_info}")
            
            if fc_info is None:
                # File doesn't exist - would upload as new
                actions['upload_new'].append({
                    'document': doc,
                    'fc_path': fc_path,
                    'action': 'Upload as new file'
                })
                print(f"   ➕ Action: UPLOAD NEW FILE")
                
            else:
                # File exists - compare modification times
                # Compare modification times to decide on action
                sp_modified = doc.get('modified')
                fc_modified = fc_info.get('lastmodified')
                
                if sp_modified and fc_modified:
                    try:
                        sp_dt = datetime.fromisoformat(sp_modified.replace('Z', '+00:00'))
                        fc_dt = datetime.fromisoformat(fc_modified.replace('Z', '+00:00'))
                        
                        if sp_dt > fc_dt:
                            actions['update_existing'].append({
                                'document': doc,
                                'fc_path': fc_path,
                                'fc_info': fc_info,
                                'action': 'Update existing file (SharePoint newer)'
                            })
                            print(f"   🔄 Action: UPDATE EXISTING (SP newer: {format_datetime(sp_modified)} vs FC: {format_datetime(fc_modified)})")
                        else:
                            actions['skip_same'].append({
                                'document': doc,
                                'fc_path': fc_path,
                                'fc_info': fc_info,
                                'action': 'Skip - FileCloud is up to date'
                            })
                            print(f"   ⏭️  Action: SKIP (FC up to date: {format_datetime(fc_modified)} vs SP: {format_datetime(sp_modified)})")
                    except Exception as e:
                        actions['errors'].append({
                            'document': doc,
                            'fc_path': fc_path,
                            'error': f"Date comparison error: {e}"
                        })
                        print(f"   ❌ Action: ERROR comparing dates: {e}")
                else:
                    # Can't compare dates - assume update needed
                    actions['update_existing'].append({
                        'document': doc,
                        'fc_path': fc_path,
                        'fc_info': fc_info,
                        'action': 'Update existing file (cannot compare dates)'
                    })
                    print(f"   🔄 Action: UPDATE EXISTING (cannot compare dates)")
                    
        except Exception as e:
            actions['errors'].append({
                'document': doc,
                'fc_path': fc_path if 'fc_path' in locals() else 'Unknown',
                'error': str(e)
            })
            print(f"   ❌ Action: ERROR: {e}")
    
    # Print summary
    print("\n" + "=" * 60)
    print("📋 DRY RUN SUMMARY")
    print("=" * 60)
    
    print(f"📊 Total documents analyzed: {len(documents)}")
    print(f"➕ New files to upload: {len(actions['upload_new'])}")
    print(f"🔄 Existing files to update: {len(actions['update_existing'])}")
    print(f"⏭️  Files to skip (up to date): {len(actions['skip_same'])}")
    print(f"❌ Errors encountered: {len(actions['errors'])}")
    
    # Show details for each category
    if actions['upload_new']:
        print(f"\n➕ NEW FILES TO UPLOAD ({len(actions['upload_new'])}):")
        for item in actions['upload_new'][:10]:  # Show first 10
            doc = item['document']
            print(f"   • {doc['name']} → {item['fc_path']}")
            print(f"     Size: {format_file_size(doc.get('size'))}")
        if len(actions['upload_new']) > 10:
            print(f"   ... and {len(actions['upload_new']) - 10} more")
    
    if actions['update_existing']:
        print(f"\n🔄 FILES TO UPDATE ({len(actions['update_existing'])}):")
        for item in actions['update_existing'][:10]:  # Show first 10
            doc = item['document']
            print(f"   • {doc['name']} → {item['fc_path']}")
            print(f"     Size: {format_file_size(doc.get('size'))}")
        if len(actions['update_existing']) > 10:
            print(f"   ... and {len(actions['update_existing']) - 10} more")
    
    if actions['errors']:
        print(f"\n❌ ERRORS ({len(actions['errors'])}):")
        for item in actions['errors'][:5]:  # Show first 5 errors
            doc = item['document']
            print(f"   • {doc['name']}: {item['error']}")
        if len(actions['errors']) > 5:
            print(f"   ... and {len(actions['errors']) - 5} more errors")
    
    print(f"\n💡 This was a DRY RUN - no files were actually uploaded or modified")
    print(f"🚀 To perform the actual sync, run: python main.py --once")

Parameters

Name	Type	Default	Kind
`max_documents`	-	100	positional_or_keyword

Parameter Details

max_documents: Maximum number of documents to retrieve from SharePoint for analysis. Defaults to 100. This limit helps prevent overwhelming API calls and provides a manageable sample size for testing. Must be a positive integer.

Return Value

This function does not return any value (implicitly returns None). Instead, it prints comprehensive output to the console including connection status, document analysis details, and a summary report of planned actions categorized as: new uploads, updates, skips, and errors.

Dependencies

datetime
os
sys

Required Imports

import os
import sys
from datetime import datetime
from sharepoint_graph_client import SharePointGraphClient
from filecloud_client import FileCloudClient
from config import Config

Usage Example

# Basic usage with default 100 documents
dry_run_test()

# Test with a smaller sample size
dry_run_test(max_documents=25)

# Test with a larger sample
dry_run_test(max_documents=500)

# Example output interpretation:
# ✅ SharePoint connection successful
# ✅ FileCloud connection successful
# 📊 Total documents analyzed: 100
# ➕ New files to upload: 45
# 🔄 Existing files to update: 30
# ⏭️  Files to skip (up to date): 20
# ❌ Errors encountered: 5

Best Practices

Always run this dry run test before executing actual file synchronization to preview changes
Start with a small max_documents value (e.g., 10-25) when first testing to quickly identify configuration issues
Review the error section carefully to identify path mapping or permission issues before running actual sync
Verify that the FileCloud base path is correctly configured by examining the path construction output
Check that date comparisons are working correctly to avoid unnecessary file updates
Ensure both SharePoint and FileCloud credentials are valid before running
Monitor the console output for path construction details to troubleshoot folder structure issues
Use this function to validate that the _get_documents_recursive method respects the max_files limit
Pay attention to files that would be updated vs skipped to understand sync behavior
If many errors occur, reduce max_documents to isolate problematic files more easily

Similar Components

AI-powered semantic similarity - components with related functionality:

function main_v17 71.9% similar

Orchestrates and executes a comprehensive test suite for SharePoint to FileCloud synchronization service, running configuration, connection, and operation tests.
From: /tf/active/vicechatdev/SPFCsync/test_connections.py
function test_filecloud_integration 69.6% similar

Integration test function that verifies the SharePoint Graph API client works correctly with FileCloud synchronization service by creating a sync service instance and testing document retrieval.
From: /tf/active/vicechatdev/SPFCsync/test_graph_client.py
function main_v16 68.3% similar

Executes a diagnostic analysis for file synchronization issues, analyzes missing files, and saves the results to a JSON file.
From: /tf/active/vicechatdev/SPFCsync/deep_diagnostics.py
class SyncDiagnostics 67.7% similar

A diagnostic class that analyzes and reports on synchronization issues between SharePoint and FileCloud, identifying missing files and root causes of sync failures.
From: /tf/active/vicechatdev/SPFCsync/deep_diagnostics.py
function main_v37 66.5% similar

Main test function that validates SharePoint Graph API integration, tests the Graph client connection, and verifies FileCloud sync functionality.
From: /tf/active/vicechatdev/SPFCsync/test_graph_client.py

← Back to Browse

Assistant

Hi! I can help improve this code. Tell me what you'd like to enhance (e.g., "add error handling", "optimize performance", "improve readability", "add type hints").

Code Comparison

Original Code

                            def dry_run_test(max_documents=100):
    """
    Perform a dry run test with limited documents
    """
    print("🧪 SharePoint to FileCloud Sync - Dry Run Test")
    print("=" * 60)
    
    # Initialize clients
    config = Config()
    
    print("🔗 Connecting to SharePoint...")
    sp_client = SharePointGraphClient(
        site_url=config.SHAREPOINT_SITE_URL,
        client_id=config.AZURE_CLIENT_ID,
        client_secret=config.AZURE_CLIENT_SECRET
    )
    
    print("🔗 Connecting to FileCloud...")
    fc_client = FileCloudClient(
        server_url=config.FILECLOUD_SERVER_URL,
        username=config.FILECLOUD_USERNAME,
        password=config.FILECLOUD_PASSWORD
    )
    
    # Test connections
    try:
        # SharePoint connection is tested during initialization
        print("✅ SharePoint connection successful")
    except Exception as e:
        print(f"❌ SharePoint connection failed: {e}")
        return
    
    try:
        # FileCloud connection is tested during initialization  
        print("✅ FileCloud connection successful")
    except Exception as e:
        print(f"❌ FileCloud connection failed: {e}")
        return
    
    print(f"\n📊 Discovering up to {max_documents} documents from SharePoint...")
    
    # Get limited documents from SharePoint
    documents = []
    try:
        # Start with root folder and collect documents up to the limit
        sp_client._get_documents_recursive("/", documents, max_files=max_documents)
        
        if not documents:
            print("❌ No documents found in SharePoint")
            return
            
        print(f"✅ Found {len(documents)} documents in SharePoint")
        
    except Exception as e:
        print(f"❌ Error retrieving SharePoint documents: {e}")
        return
    
    print(f"\n🔍 Analyzing sync requirements...")
    print(f"📂 FileCloud base path: {config.FILECLOUD_BASE_PATH}")
    print(f"🌐 FileCloud server: {config.FILECLOUD_SERVER_URL}")
    print("-" * 60)
    
    actions = {
        'upload_new': [],
        'update_existing': [],
        'skip_same': [],
        'errors': []
    }
    
    for i, doc in enumerate(documents, 1):
        print(f"\n[{i}/{len(documents)}] Analyzing: {doc['name']}")
        print(f"   📁 Path: {doc['folder_path']}")
        print(f"   📏 Size: {format_file_size(doc.get('size'))}")
        print(f"   📅 Modified: {format_datetime(doc.get('modified'))}")
        
        try:
            # Construct FileCloud path - let's trace this step by step
            print(f"   🔧 Building FileCloud path:")
            print(f"      SharePoint folder_path: '{doc['folder_path']}'")
            print(f"      SharePoint file name: '{doc['name']}'")
            
            # Start with the configured base path
            base_path = config.FILECLOUD_BASE_PATH
            print(f"      FileCloud base path: '{base_path}'")
            
            # Add the SharePoint folder structure
            sp_folder = doc['folder_path']
            if sp_folder and sp_folder != '/':
                if not base_path.endswith('/'):
                    base_path += '/'
                if sp_folder.startswith('/'):
                    sp_folder = sp_folder[1:]  # Remove leading slash
                fc_path = base_path + sp_folder + '/' + doc['name']
            else:
                # File is in root
                if not base_path.endswith('/'):
                    base_path += '/'
                fc_path = base_path + doc['name']
            
            print(f"      Final FileCloud path: '{fc_path}'")
            print(f"   🔍 Looking for file in FileCloud at: {fc_path}")
            
            # Check if file exists in FileCloud
            print(f"   📡 Calling FileCloud get_file_info with path: '{fc_path}'")
            fc_info = fc_client.get_file_info(fc_path)
            
            if fc_info is None:
                print(f"   📂 File not found in FileCloud (will be uploaded as new)")
                # Let's also try to check if the folder exists
                folder_path = '/'.join(fc_path.split('/')[:-1])
                print(f"   �️  Checking if folder exists: '{folder_path}'")
                try:
                    folder_info = fc_client.get_file_info(folder_path)
                    if folder_info:
                        print(f"      ✅ Parent folder exists in FileCloud")
                    else:
                        print(f"      📁 Parent folder does not exist (will be created)")
                except Exception as e:
                    print(f"      ❓ Could not check parent folder: {e}")
            else:
                print(f"   �📄 File found in FileCloud:")
                print(f"      Size: {format_file_size(fc_info.get('size'))}")
                print(f"      Modified: {format_datetime(fc_info.get('lastmodified'))}")
                print(f"      FileCloud info: {fc_info}")
            
            if fc_info is None:
                # File doesn't exist - would upload as new
                actions['upload_new'].append({
                    'document': doc,
                    'fc_path': fc_path,
                    'action': 'Upload as new file'
                })
                print(f"   ➕ Action: UPLOAD NEW FILE")
                
            else:
                # File exists - compare modification times
                # Compare modification times to decide on action
                sp_modified = doc.get('modified')
                fc_modified = fc_info.get('lastmodified')
                
                if sp_modified and fc_modified:
                    try:
                        sp_dt = datetime.fromisoformat(sp_modified.replace('Z', '+00:00'))
                        fc_dt = datetime.fromisoformat(fc_modified.replace('Z', '+00:00'))
                        
                        if sp_dt > fc_dt:
                            actions['update_existing'].append({
                                'document': doc,
                                'fc_path': fc_path,
                                'fc_info': fc_info,
                                'action': 'Update existing file (SharePoint newer)'
                            })
                            print(f"   🔄 Action: UPDATE EXISTING (SP newer: {format_datetime(sp_modified)} vs FC: {format_datetime(fc_modified)})")
                        else:
                            actions['skip_same'].append({
                                'document': doc,
                                'fc_path': fc_path,
                                'fc_info': fc_info,
                                'action': 'Skip - FileCloud is up to date'
                            })
                            print(f"   ⏭️  Action: SKIP (FC up to date: {format_datetime(fc_modified)} vs SP: {format_datetime(sp_modified)})")
                    except Exception as e:
                        actions['errors'].append({
                            'document': doc,
                            'fc_path': fc_path,
                            'error': f"Date comparison error: {e}"
                        })
                        print(f"   ❌ Action: ERROR comparing dates: {e}")
                else:
                    # Can't compare dates - assume update needed
                    actions['update_existing'].append({
                        'document': doc,
                        'fc_path': fc_path,
                        'fc_info': fc_info,
                        'action': 'Update existing file (cannot compare dates)'
                    })
                    print(f"   🔄 Action: UPDATE EXISTING (cannot compare dates)")
                    
        except Exception as e:
            actions['errors'].append({
                'document': doc,
                'fc_path': fc_path if 'fc_path' in locals() else 'Unknown',
                'error': str(e)
            })
            print(f"   ❌ Action: ERROR: {e}")
    
    # Print summary
    print("\n" + "=" * 60)
    print("📋 DRY RUN SUMMARY")
    print("=" * 60)
    
    print(f"📊 Total documents analyzed: {len(documents)}")
    print(f"➕ New files to upload: {len(actions['upload_new'])}")
    print(f"🔄 Existing files to update: {len(actions['update_existing'])}")
    print(f"⏭️  Files to skip (up to date): {len(actions['skip_same'])}")
    print(f"❌ Errors encountered: {len(actions['errors'])}")
    
    # Show details for each category
    if actions['upload_new']:
        print(f"\n➕ NEW FILES TO UPLOAD ({len(actions['upload_new'])}):")
        for item in actions['upload_new'][:10]:  # Show first 10
            doc = item['document']
            print(f"   • {doc['name']} → {item['fc_path']}")
            print(f"     Size: {format_file_size(doc.get('size'))}")
        if len(actions['upload_new']) > 10:
            print(f"   ... and {len(actions['upload_new']) - 10} more")
    
    if actions['update_existing']:
        print(f"\n🔄 FILES TO UPDATE ({len(actions['update_existing'])}):")
        for item in actions['update_existing'][:10]:  # Show first 10
            doc = item['document']
            print(f"   • {doc['name']} → {item['fc_path']}")
            print(f"     Size: {format_file_size(doc.get('size'))}")
        if len(actions['update_existing']) > 10:
            print(f"   ... and {len(actions['update_existing']) - 10} more")
    
    if actions['errors']:
        print(f"\n❌ ERRORS ({len(actions['errors'])}):")
        for item in actions['errors'][:5]:  # Show first 5 errors
            doc = item['document']
            print(f"   • {doc['name']}: {item['error']}")
        if len(actions['errors']) > 5:
            print(f"   ... and {len(actions['errors']) - 5} more errors")
    
    print(f"\n💡 This was a DRY RUN - no files were actually uploaded or modified")
    print(f"🚀 To perform the actual sync, run: python main.py --once")
                        

Improved Code

🔍 Code Extractor

function dry_run_test

Purpose

Source Code

Parameters

Parameter Details

Return Value

Dependencies

Required Imports

Usage Example

Best Practices

Tags

Similar Components

function main_v17 71.9% similar

function test_filecloud_integration 69.6% similar

function main_v16 68.3% similar

class SyncDiagnostics 67.7% similar

function main_v37 66.5% similar

function dry_run_test

Purpose

Source Code

Parameters

Parameter Details

Return Value

Dependencies

Required Imports

Usage Example

Best Practices

Tags

Similar Components

function main_v17 71.9% similar

function test_filecloud_integration 69.6% similar

function main_v16 68.3% similar

class SyncDiagnostics 67.7% similar

function main_v37 66.5% similar

✨ Improve Code: dry_run_test

Code Comparison