class LegacySystemMigrator
Migrates data from a legacy file-based document system to a new database-backed architecture with TextSection-centric design, including document metadata, sections, chat configurations, and version history.
/tf/active/vicechatdev/vice_ai/migration.py
20 - 373
complex
Purpose
The LegacySystemMigrator class orchestrates the complete migration process from legacy JSON-based document files to a modern database system. It handles discovery of legacy documents, transformation of document structures, migration of sections with their content and metadata, preservation of chat configurations and history, and provides verification capabilities. The class also includes functionality to create sample data for testing the new architecture. It serves as a one-time migration tool or can be used to bootstrap new environments with test data.
Source Code
class LegacySystemMigrator:
"""Migrates data from legacy file-based system to new database"""
def __init__(self, legacy_data_dir: str, db_path: str = "documents.db"):
self.legacy_data_dir = legacy_data_dir
self.db_manager = DatabaseManager(db_path)
self.text_section_service = TextSectionService(self.db_manager)
self.document_service = DocumentService(self.db_manager, self.text_section_service)
def run_migration(self) -> Dict[str, Any]:
"""Run the complete migration process"""
print("Starting migration from legacy system to TextSection-centric architecture...")
# Initialize database
self.db_manager.init_database()
migration_report = {
'start_time': datetime.now().isoformat(),
'documents_migrated': 0,
'text_sections_created': 0,
'errors': [],
'warnings': []
}
try:
# Find all legacy document files
legacy_documents = self._discover_legacy_documents()
print(f"Found {len(legacy_documents)} legacy documents")
for doc_file in legacy_documents:
try:
self._migrate_document(doc_file, migration_report)
except Exception as e:
error_msg = f"Error migrating {doc_file}: {str(e)}"
print(error_msg)
migration_report['errors'].append(error_msg)
migration_report['end_time'] = datetime.now().isoformat()
migration_report['success'] = len(migration_report['errors']) == 0
print(f"Migration completed. {migration_report['documents_migrated']} documents, "
f"{migration_report['text_sections_created']} text sections created")
if migration_report['errors']:
print(f"Encountered {len(migration_report['errors'])} errors")
return migration_report
except Exception as e:
migration_report['fatal_error'] = str(e)
migration_report['success'] = False
print(f"Fatal error during migration: {e}")
return migration_report
def _discover_legacy_documents(self) -> List[str]:
"""Find all legacy document files in the data directory"""
document_files = []
for root, dirs, files in os.walk(self.legacy_data_dir):
for file in files:
if file.endswith('.json') and not file.startswith('.'):
document_files.append(os.path.join(root, file))
return document_files
def _migrate_document(self, doc_file: str, report: Dict[str, Any]):
"""Migrate a single legacy document"""
try:
with open(doc_file, 'r', encoding='utf-8') as f:
legacy_data = json.load(f)
except Exception as e:
raise Exception(f"Failed to read document file: {e}")
# Extract document metadata
doc_title = legacy_data.get('title', os.path.basename(doc_file).replace('.json', ''))
doc_owner = legacy_data.get('owner', 'migrated_user')
doc_description = legacy_data.get('description', 'Migrated from legacy system')
# Create new document
document = self.document_service.create_document(
owner=doc_owner,
title=doc_title,
description=doc_description
)
# Migrate sections
sections = legacy_data.get('sections', [])
if isinstance(sections, dict): # Handle different legacy formats
sections = list(sections.values())
for i, section_data in enumerate(sections):
try:
text_section = self._migrate_section(section_data, doc_owner, report)
if text_section:
# Add to document
self.document_service.add_text_section_to_document(
document.id,
text_section.id,
position=i
)
except Exception as e:
warning_msg = f"Warning: Failed to migrate section in {doc_file}: {e}"
print(warning_msg)
report['warnings'].append(warning_msg)
report['documents_migrated'] += 1
print(f"Migrated document: {doc_title}")
def _migrate_section(self, section_data: Dict, owner: str, report: Dict[str, Any]) -> Optional[TextSection]:
"""Migrate a single section to TextSection"""
try:
# Extract section data with fallbacks for different legacy formats
title = section_data.get('title', section_data.get('name', 'Untitled Section'))
content = section_data.get('content', section_data.get('text', ''))
level = section_data.get('level', section_data.get('heading_level', 1))
# Determine section type
section_type = self._determine_section_type(section_data)
# Create text section
text_section = self.text_section_service.create_text_section(
owner=owner,
title=title,
section_type=section_type,
level=level,
initial_content=content
)
# Migrate chat configuration if present
chat_config = section_data.get('chat_config', section_data.get('ai_config'))
if chat_config:
self._migrate_chat_configuration(text_section.id, chat_config)
# Migrate chat history if present
chat_history = section_data.get('chat_history', section_data.get('ai_history', []))
if chat_history:
self._migrate_chat_history(text_section.id, chat_history)
# Migrate tags and metadata
tags = section_data.get('tags', [])
if tags:
text_section.tags = tags
self.text_section_service.db.save_text_section(text_section)
metadata = section_data.get('metadata', {})
if metadata:
text_section.metadata = metadata
self.text_section_service.db.save_text_section(text_section)
report['text_sections_created'] += 1
return text_section
except Exception as e:
raise Exception(f"Failed to migrate section '{section_data.get('title', 'unknown')}': {e}")
def _determine_section_type(self, section_data: Dict) -> SectionType:
"""Determine section type from legacy data"""
section_type_str = section_data.get('type', section_data.get('section_type', 'text'))
type_mapping = {
'text': SectionType.TEXT,
'heading': SectionType.HEADING,
'list': SectionType.LIST,
'table': SectionType.TABLE,
'code': SectionType.CODE,
'quote': SectionType.QUOTE,
'image': SectionType.IMAGE,
'reference': SectionType.REFERENCE,
'generated': SectionType.GENERATED,
'outline': SectionType.OUTLINE
}
return type_mapping.get(section_type_str.lower(), SectionType.TEXT)
def _migrate_chat_configuration(self, section_id: str, chat_config: Dict):
"""Migrate chat configuration"""
try:
config = ChatConfiguration(
system_prompt=chat_config.get('system_prompt', ''),
model=chat_config.get('model', 'gpt-3.5-turbo'),
temperature=float(chat_config.get('temperature', 0.7)),
max_tokens=int(chat_config.get('max_tokens', 1000)),
context_window=int(chat_config.get('context_window', 10)),
auto_save=bool(chat_config.get('auto_save', True)),
reference_mode=chat_config.get('reference_mode', 'citations')
)
self.text_section_service.update_chat_configuration(section_id, config)
except Exception as e:
print(f"Warning: Failed to migrate chat configuration: {e}")
def _migrate_chat_history(self, section_id: str, chat_history: List[Dict]):
"""Migrate chat message history"""
try:
for message_data in chat_history:
role = message_data.get('role', 'user')
content = message_data.get('content', '')
references = message_data.get('references', [])
if content: # Only migrate non-empty messages
self.text_section_service.add_chat_message(
section_id=section_id,
role=role,
content=content,
references=references
)
except Exception as e:
print(f"Warning: Failed to migrate chat history: {e}")
def create_sample_data(self) -> Dict[str, Any]:
"""Create sample data for testing the new architecture"""
print("Creating sample data...")
# Initialize database
self.db_manager.init_database()
sample_data = {
'users': ['alice', 'bob'],
'documents': [],
'text_sections': []
}
for user in sample_data['users']:
# Create sample text sections
intro_section = self.text_section_service.create_text_section(
owner=user,
title="Introduction",
section_type=SectionType.HEADING,
level=1,
initial_content=f"# Introduction\n\nThis is a sample document for {user}."
)
content_section = self.text_section_service.create_text_section(
owner=user,
title="Main Content",
section_type=SectionType.TEXT,
level=2,
initial_content="This section contains the main content of the document. It demonstrates the TextSection-centric architecture with versioning and chat capabilities."
)
conclusion_section = self.text_section_service.create_text_section(
owner=user,
title="Conclusion",
section_type=SectionType.TEXT,
level=2,
initial_content="In conclusion, this sample demonstrates the new architecture's capabilities."
)
sample_data['text_sections'].extend([
intro_section.id, content_section.id, conclusion_section.id
])
# Create sample document
document = self.document_service.create_document(
owner=user,
title=f"{user.title()}'s Sample Document",
description="A sample document demonstrating the new TextSection-centric architecture"
)
# Add sections to document
self.document_service.add_text_section_to_document(document.id, intro_section.id, 0)
self.document_service.add_text_section_to_document(document.id, content_section.id, 1)
self.document_service.add_text_section_to_document(document.id, conclusion_section.id, 2)
sample_data['documents'].append(document.id)
# Add sample chat configuration and messages
chat_config = ChatConfiguration(
system_prompt="You are a helpful document writing assistant.",
model="gpt-3.5-turbo",
temperature=0.7,
max_tokens=1000
)
self.text_section_service.update_chat_configuration(content_section.id, chat_config)
# Add sample chat messages
self.text_section_service.add_chat_message(
content_section.id,
"user",
"Can you help me expand this section?"
)
self.text_section_service.add_chat_message(
content_section.id,
"assistant",
"I'd be happy to help expand this section. What specific aspects would you like me to focus on?",
[{"type": "suggestion", "content": "Consider adding examples or case studies"}]
)
print(f"Created sample data: {len(sample_data['documents'])} documents, {len(sample_data['text_sections'])} text sections")
return sample_data
def verify_migration(self) -> Dict[str, Any]:
"""Verify the migration was successful"""
print("Verifying migration...")
verification_report = {
'database_tables_exist': False,
'users_found': [],
'documents_count': 0,
'text_sections_count': 0,
'versions_count': 0,
'chat_configurations_found': 0,
'chat_messages_count': 0,
'issues': []
}
try:
# Check if database tables exist
verification_report['database_tables_exist'] = self.db_manager.verify_tables()
# Get all users
users = set()
documents = self.db_manager.get_all_documents()
text_sections = self.db_manager.get_all_text_sections()
for doc in documents:
users.add(doc.owner)
for section in text_sections:
users.add(section.owner)
verification_report['users_found'] = list(users)
verification_report['documents_count'] = len(documents)
verification_report['text_sections_count'] = len(text_sections)
# Count versions and chat data
total_versions = 0
chat_configs = 0
total_messages = 0
for section in text_sections:
versions = self.text_section_service.get_text_section_versions(section.id)
total_versions += len(versions)
if section.chat_configuration and section.chat_configuration.system_prompt:
chat_configs += 1
total_messages += len(section.chat_messages)
verification_report['versions_count'] = total_versions
verification_report['chat_configurations_found'] = chat_configs
verification_report['chat_messages_count'] = total_messages
print(f"Verification complete: {verification_report['text_sections_count']} text sections, "
f"{verification_report['documents_count']} documents, "
f"{verification_report['versions_count']} versions")
except Exception as e:
verification_report['issues'].append(f"Verification error: {e}")
return verification_report
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
legacy_data_dir: Path to the directory containing legacy JSON document files. The migrator will recursively search this directory for all .json files (excluding hidden files starting with '.') to migrate.
db_path: Path to the SQLite database file where migrated data will be stored. Defaults to 'documents.db'. If the file doesn't exist, it will be created during migration.
Return Value
The constructor returns a LegacySystemMigrator instance. Key methods return: run_migration() returns a Dict with migration statistics including 'start_time', 'end_time', 'documents_migrated', 'text_sections_created', 'errors', 'warnings', and 'success' boolean; create_sample_data() returns a Dict with 'users', 'documents', and 'text_sections' lists containing IDs; verify_migration() returns a Dict with verification results including table existence, counts, and any issues found.
Class Interface
Methods
__init__(self, legacy_data_dir: str, db_path: str = 'documents.db')
Purpose: Initialize the migrator with paths to legacy data and target database, creating necessary service instances
Parameters:
legacy_data_dir: Directory path containing legacy JSON document filesdb_path: Path to SQLite database file (created if doesn't exist)
Returns: LegacySystemMigrator instance
run_migration(self) -> Dict[str, Any]
Purpose: Execute the complete migration process from legacy files to database, including discovery, transformation, and error handling
Returns: Dictionary containing migration report with keys: 'start_time', 'end_time', 'documents_migrated', 'text_sections_created', 'errors' (list), 'warnings' (list), 'success' (bool), and optionally 'fatal_error'
_discover_legacy_documents(self) -> List[str]
Purpose: Recursively search the legacy data directory for all JSON document files
Returns: List of full file paths to legacy JSON documents (excludes hidden files starting with '.')
_migrate_document(self, doc_file: str, report: Dict[str, Any])
Purpose: Migrate a single legacy document file, creating a new Document and migrating all its sections
Parameters:
doc_file: Full path to the legacy JSON document filereport: Migration report dictionary to update with progress and errors
Returns: None (updates report dictionary in-place)
_migrate_section(self, section_data: Dict, owner: str, report: Dict[str, Any]) -> Optional[TextSection]
Purpose: Transform and migrate a single section from legacy format to TextSection, including content, chat config, and metadata
Parameters:
section_data: Dictionary containing legacy section data with fields like 'title', 'content', 'type', 'level', 'chat_config', 'chat_history', 'tags', 'metadata'owner: Username of the section ownerreport: Migration report dictionary to update with section count
Returns: TextSection object if successful, None if migration fails
_determine_section_type(self, section_data: Dict) -> SectionType
Purpose: Map legacy section type strings to SectionType enum values
Parameters:
section_data: Dictionary containing 'type' or 'section_type' field
Returns: SectionType enum value (defaults to SectionType.TEXT if type not recognized)
_migrate_chat_configuration(self, section_id: str, chat_config: Dict)
Purpose: Migrate chat/AI configuration settings for a section
Parameters:
section_id: ID of the TextSection to updatechat_config: Dictionary with chat configuration fields: 'system_prompt', 'model', 'temperature', 'max_tokens', 'context_window', 'auto_save', 'reference_mode'
Returns: None (prints warning if migration fails)
_migrate_chat_history(self, section_id: str, chat_history: List[Dict])
Purpose: Migrate chat message history for a section
Parameters:
section_id: ID of the TextSection to add messages tochat_history: List of message dictionaries with 'role', 'content', and optional 'references' fields
Returns: None (prints warning if migration fails, skips empty messages)
create_sample_data(self) -> Dict[str, Any]
Purpose: Generate sample documents and text sections for testing the new architecture
Returns: Dictionary with keys 'users' (list of usernames), 'documents' (list of document IDs), 'text_sections' (list of section IDs)
verify_migration(self) -> Dict[str, Any]
Purpose: Verify migration success by checking database integrity and counting migrated entities
Returns: Dictionary with verification results: 'database_tables_exist', 'users_found', 'documents_count', 'text_sections_count', 'versions_count', 'chat_configurations_found', 'chat_messages_count', 'issues' (list of errors)
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
legacy_data_dir |
str | Path to directory containing legacy JSON document files to migrate | instance |
db_manager |
DatabaseManager | Database manager instance for low-level database operations and initialization | instance |
text_section_service |
TextSectionService | Service for creating and managing TextSection entities, versions, and chat functionality | instance |
document_service |
DocumentService | Service for creating and managing Document entities and their section associations | instance |
Dependencies
osjsonuuiddatetimetypingmodelsservicesargparse
Required Imports
import os
import json
import uuid
from datetime import datetime
from typing import Dict, List, Any, Optional
from models import TextSection, Document, DocumentSection, TextSectionVersion, ChatConfiguration, ChatMessage, SectionType, ContentStatus, DatabaseManager
from services import TextSectionService, DocumentService
import argparse
Usage Example
# Basic migration from legacy system
migrator = LegacySystemMigrator(
legacy_data_dir='/path/to/legacy/documents',
db_path='new_documents.db'
)
# Run the migration
report = migrator.run_migration()
print(f"Migration success: {report['success']}")
print(f"Documents migrated: {report['documents_migrated']}")
print(f"Text sections created: {report['text_sections_created']}")
# Verify migration results
verification = migrator.verify_migration()
print(f"Database tables exist: {verification['database_tables_exist']}")
print(f"Total sections: {verification['text_sections_count']}")
# Or create sample data for testing
migrator_test = LegacySystemMigrator(
legacy_data_dir='/tmp',
db_path='test.db'
)
sample_data = migrator_test.create_sample_data()
print(f"Created {len(sample_data['documents'])} sample documents")
Best Practices
- Always call run_migration() or create_sample_data() before verify_migration() to ensure database is initialized
- The migrator automatically initializes the database schema via db_manager.init_database()
- Migration is not idempotent - running multiple times will create duplicate entries. Clear the database between runs if needed
- Check the 'errors' and 'warnings' lists in the migration report to identify problematic legacy documents
- The migrator handles multiple legacy JSON formats with fallback field names (e.g., 'content' or 'text', 'title' or 'name')
- Failed section migrations are logged as warnings but don't stop document migration - partial documents may result
- Chat configurations and history are optional - sections without them will still migrate successfully
- Use verify_migration() after migration to ensure data integrity and completeness
- The class maintains references to service objects (document_service, text_section_service) throughout its lifetime
- All database operations are performed through the service layer, not directly on DatabaseManager
- Legacy JSON files must be valid JSON - malformed files will cause migration errors for that document
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function main_v13 68.5% similar
-
function migrate_from_legacy 60.2% similar
-
class TextSectionService 54.6% similar
-
class TextSection 54.1% similar
-
class DocumentSection 51.6% similar