🔍 Code Extractor

class DocumentService

Maturity: 51

Service class for managing Document entities, including creation, retrieval, section management, versioning, and duplication operations.

File:
/tf/active/vicechatdev/vice_ai/services.py
Lines:
406 - 818
Complexity:
complex

Purpose

DocumentService provides a comprehensive interface for document lifecycle management in a document management system. It handles document CRUD operations, manages relationships between documents and their sections (both text and data sections), supports document versioning and history tracking, enables document duplication with or without copying sections, and provides utilities for tracking section usage across documents. The service acts as a facade coordinating between the database layer and other section services.

Source Code

class DocumentService:
    """Service for managing Documents"""
    
    def __init__(self, db_manager: DatabaseManager, text_section_service: TextSectionService, data_section_service: DataSectionService):
        self.db = db_manager
        self.text_section_service = text_section_service
        self.data_section_service = data_section_service
    
    def create_document(self, owner: str, title: str, description: str = "") -> Document:
        """Create a new document"""
        document_id = str(uuid.uuid4())
        
        document = Document(
            id=document_id,
            owner=owner,
            title=title,
            description=description
        )
        
        self.db.save_document(document)
        return document
    
    def get_document(self, document_id: str) -> Optional[Document]:
        """Get a document by ID"""
        return self.db.get_document(document_id)
    
    def get_user_documents(self, owner: str) -> List[Document]:
        """Get all documents for a user"""
        return self.db.get_user_documents(owner)
    
    def add_text_section_to_document(
        self, 
        document_id: str, 
        text_section_id: str, 
        position: int = -1,
        parent_id: str = None,
        is_copy: bool = False
    ) -> bool:
        """Add a text section reference to a document"""
        document = self.get_document(document_id)
        if not document:
            return False
        
        # Verify text section exists
        text_section = self.text_section_service.get_text_section(text_section_id)
        if not text_section:
            return False
        
        # Create document section reference
        doc_section = DocumentSection(
            id=str(uuid.uuid4()),
            section_id=text_section_id,
            section_type=SectionType.TEXT,
            position=len(document.sections) if position == -1 else position,
            parent_id=parent_id,
            is_copy=is_copy
        )
        
        if position == -1:
            document.sections.append(doc_section)
        else:
            document.sections.insert(position, doc_section)
            
            # Update positions of subsequent sections
            for i, section in enumerate(document.sections[position + 1:], position + 1):
                section.position = i
        
        document.updated_at = datetime.now()
        
        # Save both to document JSON sections AND to document_sections table
        success = self.db.save_document(document)
        if success:
            # Also save to document_sections table for querying
            success = self.db.save_document_section(doc_section, document_id)
        
        return success
    
    def add_section_to_document(self, document_id: str, section_id: str, section_type: SectionType, position: int = None) -> bool:
        """Add any type of section to a document"""
        document = self.get_document(document_id)
        if not document:
            return False
        
        # Verify section exists based on type
        if section_type == SectionType.TEXT:
            section = self.get_text_section(section_id)
        elif section_type == SectionType.DATA:
            section = self.data_section_service.get_data_section(section_id)
        else:
            return False
        
        if not section:
            return False
        
        # Check if section is already in document
        existing_section = next((s for s in document.sections if s.section_id == section_id), None)
        if existing_section:
            return False  # Section already exists
        
        # Determine position
        if position is None:
            position = len(document.sections)
        else:
            position = max(0, min(position, len(document.sections)))
        
        # Create document section
        doc_section = DocumentSection(
            id=str(uuid.uuid4()),
            section_id=section_id,
            section_type=section_type,
            position=position
        )
        
        # Insert at position and update positions
        document.sections.insert(position, doc_section)
        for i, section in enumerate(document.sections):
            section.position = i
        
        document.updated_at = datetime.now()
        
        # Save to database
        success = self.db.save_document(document)
        if success:
            success = self.db.save_document_section(doc_section, document_id)
        
        return success
    
    def remove_section_from_document(self, document_id: str, doc_section_id: str) -> bool:
        """Remove a section reference from a document"""
        document = self.get_document(document_id)
        if not document:
            return False
        
        # Find and remove the section
        document.sections = [s for s in document.sections if s.id != doc_section_id]
        
        # Update positions
        for i, section in enumerate(document.sections):
            section.position = i
        
        document.updated_at = datetime.now()
        return self.db.save_document(document)
    
    def move_section_in_document(
        self, 
        document_id: str, 
        doc_section_id: str, 
        new_position: int
    ) -> bool:
        """Move a section to a new position in the document"""
        document = self.get_document(document_id)
        if not document:
            return False
        
        # Find the section
        section_to_move = next((s for s in document.sections if s.id == doc_section_id), None)
        if not section_to_move:
            return False
        
        # Remove from current position
        document.sections.remove(section_to_move)
        
        # Insert at new position
        document.sections.insert(new_position, section_to_move)
        
        # Update all positions
        for i, section in enumerate(document.sections):
            section.position = i
        
        document.updated_at = datetime.now()
        return self.db.save_document(document)
    
    def get_document_with_text_sections(self, document_id: str) -> Optional[Tuple[Document, List[TextSection]]]:
        """Get document with all referenced text sections loaded"""
        document = self.get_document(document_id)
        if not document:
            return None
        
        text_sections = []
        for doc_section in document.sections:
            # Only get text sections, skip data sections
            if doc_section.section_type == SectionType.TEXT:
                text_section = self.text_section_service.get_text_section(doc_section.section_id)
                if text_section:
                    text_sections.append(text_section)
        
        return document, text_sections
    
    def get_document_with_all_sections(self, document_id: str) -> Optional[Tuple[Document, List[TextSection], List[DataSection]]]:
        """Get document with all referenced sections (both text and data) loaded"""
        document = self.get_document(document_id)
        if not document:
            return None
        
        text_sections = []
        data_sections = []
        
        for doc_section in document.sections:
            if doc_section.section_type == SectionType.TEXT:
                text_section = self.text_section_service.get_text_section(doc_section.section_id)
                if text_section:
                    text_sections.append(text_section)
            elif doc_section.section_type == SectionType.DATA:
                data_section = self.data_section_service.get_data_section(doc_section.section_id)
                if data_section:
                    data_sections.append(data_section)
        
        return document, text_sections, data_sections
    
    def get_document_references(self, document_id: str) -> List[Dict]:
        """Get all combined references from document's text sections"""
        document, text_sections = self.get_document_with_text_sections(document_id)
        if not document or not text_sections:
            return []
        
        all_references = []
        ref_counter = 1
        
        for text_section in text_sections:
            for ref in text_section.last_references:
                # Add reference with sequential numbering
                ref_copy = ref.copy()
                ref_copy['document_ref_number'] = ref_counter
                ref_copy['source_section'] = text_section.title
                all_references.append(ref_copy)
                ref_counter += 1
        
        return all_references
    
    def duplicate_document(self, document_id: str, new_owner: str, new_title: str = None) -> Optional[Document]:
        """Create a duplicate document with copied text sections"""
        print(f"🔄 Starting duplicate_document: {document_id} -> {new_title}")
        
        original_doc, text_sections = self.get_document_with_text_sections(document_id)
        if not original_doc:
            print(f"❌ Original document not found: {document_id}")
            return None
        
        print(f"✅ Found original document: {original_doc.title} with {len(text_sections)} sections")
        
        # Create new document
        new_document = self.create_document(
            owner=new_owner,
            title=new_title or f"Copy of {original_doc.title}",
            description=original_doc.description
        )
        
        print(f"✅ Created new document: {new_document.id}")
        
        # Duplicate all text sections and add to new document
        for i, (doc_section, text_section) in enumerate(zip(original_doc.sections, text_sections)):
            print(f"  Processing section {i}: {text_section.title}")
            
            # Duplicate the text section
            new_text_section = self.text_section_service.duplicate_text_section(
                text_section.id,
                new_owner,
                text_section.title
            )
            
            if new_text_section:
                print(f"    ✅ Duplicated text section: {new_text_section.id}")
                # Add to new document
                success = self.add_text_section_to_document(
                    new_document.id,
                    new_text_section.id,
                    i,
                    doc_section.parent_id,
                    True  # Mark as copy
                )
                print(f"    Add to document result: {success}")
            else:
                print(f"    ❌ Failed to duplicate text section: {text_section.id}")
        
        # Reload the document to include the newly added sections
        print(f"🔄 Reloading document...")
        updated_document = self.get_document(new_document.id)
        if updated_document:
            print(f"✅ Reloaded document with {len(updated_document.sections)} sections")
        else:
            print(f"❌ Failed to reload document")
        
        return updated_document if updated_document else new_document
    
    def duplicate_document_with_references(self, document_id: str, new_owner: str, new_title: str = None, new_description: str = None) -> Optional[Document]:
        """Create a duplicate document that references the same text sections (no copies)"""
        original_doc, text_sections = self.get_document_with_text_sections(document_id)
        if not original_doc:
            return None
        
        # Create new document
        new_document = self.create_document(
            owner=new_owner,
            title=new_title or f"Reference copy of {original_doc.title}",
            description=new_description or original_doc.description
        )
        
        # Add references to the same text sections (no duplication)
        for i, doc_section in enumerate(original_doc.sections):
            # Add reference to existing section based on type
            if doc_section.section_type == SectionType.TEXT:
                self.add_text_section_to_document(
                    new_document.id,
                    doc_section.section_id,  # Reference the same text section
                    i,
                    doc_section.parent_id,
                    False  # Not a copy, it's a reference
                )
            # TODO: Add support for data sections in document duplication
        
        # Reload the document to include the newly added sections
        updated_document = self.get_document(new_document.id)
        return updated_document if updated_document else new_document
    
    def get_text_section_usage(self, text_section_id: str, user_email: str) -> List[Dict]:
        """Get information about which documents use a specific text section"""
        user_documents = self.get_user_documents(user_email)
        usage_info = []
        
        for document in user_documents:
            # Check if this document uses the text section
            section_positions = []
            for i, doc_section in enumerate(document.sections):
                if (doc_section.section_type == SectionType.TEXT and 
                    doc_section.section_id == text_section_id):
                    section_positions.append(i)
            
            if section_positions:
                usage_info.append({
                    'document_id': document.id,
                    'document_title': document.title,
                    'document_description': document.description,
                    'positions': section_positions,
                    'total_sections': len(document.sections),
                    'created_at': document.created_at.isoformat(),
                    'updated_at': document.updated_at.isoformat()
                })
        
        return usage_info
    
    def delete_document(self, document_id: str, delete_text_sections: bool = False) -> bool:
        """Delete a document and optionally its text sections"""
        if delete_text_sections:
            # Get all text sections first
            document, text_sections = self.get_document_with_text_sections(document_id)
            if document and text_sections:
                # Delete all text sections that are copies or not used elsewhere
                for text_section in text_sections:
                    # Only delete if this is the only document using it
                    # (This would need additional logic to check references)
                    pass
        
        return self.db.delete_document(document_id)
    
    def create_document_version(self, document_id: str, author: str, change_summary: str = "") -> Optional[str]:
        """Create a new version of a document"""
        document = self.get_document(document_id)
        if not document:
            return None
        
        version_id = str(uuid.uuid4())
        
        # Import DocumentVersion here to avoid circular imports
        from models import DocumentVersion
        
        document_version = DocumentVersion(
            version_id=version_id,
            document_id=document_id,
            title=document.title,
            description=document.description,
            sections=[section.to_dict() for section in document.sections],
            author=author,
            change_summary=change_summary,
            version_number=document.version_number
        )
        
        # Save the version
        if self.db.save_document_version(document_version):
            # Update document's current version info
            document.current_version_id = version_id
            document.version_number += 1
            document.updated_at = datetime.now()
            self.db.save_document(document)
            return version_id
        return None
    
    def get_document_versions(self, document_id: str) -> List[Dict]:
        """Get all versions for a document"""
        return self.db.get_document_versions(document_id)
    
    def revert_document_to_version(self, document_id: str, version_id: str, author: str) -> bool:
        """Revert document to a specific version"""
        versions = self.get_document_versions(document_id)
        target_version = next((v for v in versions if v['version_id'] == version_id), None)
        
        if target_version:
            # Create a new version marking the revert
            self.create_document_version(
                document_id, 
                author, 
                f"Reverted to version {target_version['version_number']}"
            )
            
            # Update document with the target version's data
            document = self.get_document(document_id)
            if document:
                document.title = target_version['title']
                document.description = target_version['description']
                # Restore sections structure
                document.sections = [DocumentSection.from_dict(s) for s in target_version['sections']]
                document.updated_at = datetime.now()
                return self.db.save_document(document)
        return False

Parameters

Name Type Default Kind
bases - -

Parameter Details

db_manager: DatabaseManager instance that handles all database persistence operations for documents, sections, and versions. Required for all database interactions.

text_section_service: TextSectionService instance that manages text section operations. Used to verify text section existence, retrieve text sections, and duplicate them when needed.

data_section_service: DataSectionService instance that manages data section operations. Used to verify data section existence and retrieve data sections for documents.

Return Value

Constructor returns a DocumentService instance. Methods return various types: Document objects for creation/retrieval operations, bool for success/failure of operations, Optional[Document] for operations that may fail, List[Document] for multi-document queries, Tuple[Document, List[TextSection]] or Tuple[Document, List[TextSection], List[DataSection]] for document retrieval with loaded sections, List[Dict] for reference and usage information, and Optional[str] for version ID creation.

Class Interface

Methods

__init__(self, db_manager: DatabaseManager, text_section_service: TextSectionService, data_section_service: DataSectionService)

Purpose: Initialize the DocumentService with required dependencies

Parameters:

  • db_manager: DatabaseManager instance for database operations
  • text_section_service: TextSectionService instance for text section operations
  • data_section_service: DataSectionService instance for data section operations

Returns: None - constructor initializes instance

create_document(self, owner: str, title: str, description: str = '') -> Document

Purpose: Create a new document with generated UUID and save to database

Parameters:

  • owner: Email or identifier of the document owner
  • title: Title of the document
  • description: Optional description of the document (default empty string)

Returns: Document object with generated ID and timestamps

get_document(self, document_id: str) -> Optional[Document]

Purpose: Retrieve a document by its ID

Parameters:

  • document_id: UUID string of the document to retrieve

Returns: Document object if found, None if not found

get_user_documents(self, owner: str) -> List[Document]

Purpose: Retrieve all documents owned by a specific user

Parameters:

  • owner: Email or identifier of the document owner

Returns: List of Document objects owned by the user (empty list if none)

add_text_section_to_document(self, document_id: str, text_section_id: str, position: int = -1, parent_id: str = None, is_copy: bool = False) -> bool

Purpose: Add a text section reference to a document at specified position, verifying section exists first

Parameters:

  • document_id: UUID of the document to add section to
  • text_section_id: UUID of the text section to add
  • position: Position to insert section (-1 for append, default -1)
  • parent_id: Optional parent section ID for hierarchical structure
  • is_copy: Whether this is a copied section or reference (default False)

Returns: True if section added successfully, False if document or section not found or operation failed

add_section_to_document(self, document_id: str, section_id: str, section_type: SectionType, position: int = None) -> bool

Purpose: Add any type of section (text or data) to a document, preventing duplicates

Parameters:

  • document_id: UUID of the document
  • section_id: UUID of the section to add
  • section_type: SectionType enum indicating TEXT or DATA section
  • position: Optional position to insert (None for append)

Returns: True if added successfully, False if document/section not found, section already exists, or operation failed

remove_section_from_document(self, document_id: str, doc_section_id: str) -> bool

Purpose: Remove a section reference from a document and reindex remaining sections

Parameters:

  • document_id: UUID of the document
  • doc_section_id: UUID of the DocumentSection to remove (not the section_id)

Returns: True if removed successfully, False if document not found or operation failed

move_section_in_document(self, document_id: str, doc_section_id: str, new_position: int) -> bool

Purpose: Move a section to a new position within the document and reindex all sections

Parameters:

  • document_id: UUID of the document
  • doc_section_id: UUID of the DocumentSection to move
  • new_position: New position index for the section

Returns: True if moved successfully, False if document or section not found or operation failed

get_document_with_text_sections(self, document_id: str) -> Optional[Tuple[Document, List[TextSection]]]

Purpose: Retrieve a document with all its text sections fully loaded (excludes data sections)

Parameters:

  • document_id: UUID of the document

Returns: Tuple of (Document, List[TextSection]) if found, None if document not found

get_document_with_all_sections(self, document_id: str) -> Optional[Tuple[Document, List[TextSection], List[DataSection]]]

Purpose: Retrieve a document with all text and data sections fully loaded

Parameters:

  • document_id: UUID of the document

Returns: Tuple of (Document, List[TextSection], List[DataSection]) if found, None if document not found

get_document_references(self, document_id: str) -> List[Dict]

Purpose: Aggregate all references from all text sections in a document with sequential numbering

Parameters:

  • document_id: UUID of the document

Returns: List of reference dictionaries with document_ref_number and source_section added (empty list if no references or document not found)

duplicate_document(self, document_id: str, new_owner: str, new_title: str = None) -> Optional[Document]

Purpose: Create a complete duplicate of a document with deep copies of all text sections

Parameters:

  • document_id: UUID of the document to duplicate
  • new_owner: Owner email for the new document
  • new_title: Optional title for new document (defaults to 'Copy of [original title]')

Returns: New Document object with copied sections, None if original document not found

duplicate_document_with_references(self, document_id: str, new_owner: str, new_title: str = None, new_description: str = None) -> Optional[Document]

Purpose: Create a duplicate document that references the same sections without copying them

Parameters:

  • document_id: UUID of the document to duplicate
  • new_owner: Owner email for the new document
  • new_title: Optional title (defaults to 'Reference copy of [original title]')
  • new_description: Optional description (defaults to original description)

Returns: New Document object with section references, None if original document not found

get_text_section_usage(self, text_section_id: str, user_email: str) -> List[Dict]

Purpose: Find all documents owned by a user that use a specific text section

Parameters:

  • text_section_id: UUID of the text section to search for
  • user_email: Email of the user whose documents to search

Returns: List of dictionaries containing document info and positions where section is used (empty list if not used)

delete_document(self, document_id: str, delete_text_sections: bool = False) -> bool

Purpose: Delete a document and optionally its associated text sections

Parameters:

  • document_id: UUID of the document to delete
  • delete_text_sections: Whether to also delete text sections (default False, currently not fully implemented)

Returns: True if deleted successfully, False otherwise

create_document_version(self, document_id: str, author: str, change_summary: str = '') -> Optional[str]

Purpose: Create a snapshot version of the current document state

Parameters:

  • document_id: UUID of the document to version
  • author: Email or identifier of the person creating the version
  • change_summary: Optional description of changes in this version

Returns: Version ID (UUID string) if successful, None if document not found or operation failed

get_document_versions(self, document_id: str) -> List[Dict]

Purpose: Retrieve all version history for a document

Parameters:

  • document_id: UUID of the document

Returns: List of version dictionaries with version metadata and content snapshots

revert_document_to_version(self, document_id: str, version_id: str, author: str) -> bool

Purpose: Restore a document to a previous version state, creating a new version entry for the revert

Parameters:

  • document_id: UUID of the document
  • version_id: UUID of the version to revert to
  • author: Email or identifier of the person performing the revert

Returns: True if reverted successfully, False if document or version not found or operation failed

Attributes

Name Type Description Scope
db DatabaseManager Database manager instance for all persistence operations instance
text_section_service TextSectionService Service for managing text section operations instance
data_section_service DataSectionService Service for managing data section operations instance

Dependencies

  • uuid
  • typing
  • datetime

Required Imports

import uuid
from typing import List, Dict, Optional, Any, Tuple
from datetime import datetime
from models import TextSection, DataSection, Document, DocumentSection, TextSectionVersion, ChatConfiguration, ChatMessage, ChatSession, SectionType, ContentStatus, DatabaseManager, DocumentVersion

Conditional/Optional Imports

These imports are only needed under specific conditions:

from models import DocumentVersion

Condition: only needed when using create_document_version method

Required (conditional)

Usage Example

# Initialize dependencies
db_manager = DatabaseManager(connection_string)
text_section_service = TextSectionService(db_manager)
data_section_service = DataSectionService(db_manager)

# Create service instance
doc_service = DocumentService(db_manager, text_section_service, data_section_service)

# Create a new document
document = doc_service.create_document(
    owner='user@example.com',
    title='My Research Paper',
    description='A comprehensive study'
)

# Add a text section to the document
success = doc_service.add_text_section_to_document(
    document_id=document.id,
    text_section_id='section-uuid',
    position=0
)

# Get document with all sections loaded
result = doc_service.get_document_with_all_sections(document.id)
if result:
    doc, text_sections, data_sections = result
    print(f'Document has {len(text_sections)} text sections')

# Create a version snapshot
version_id = doc_service.create_document_version(
    document_id=document.id,
    author='user@example.com',
    change_summary='Initial version'
)

# Duplicate document with copied sections
new_doc = doc_service.duplicate_document(
    document_id=document.id,
    new_owner='another@example.com',
    new_title='Copy of Research Paper'
)

# Get all documents for a user
user_docs = doc_service.get_user_documents('user@example.com')

Best Practices

  • Always initialize DocumentService with properly configured DatabaseManager, TextSectionService, and DataSectionService instances
  • Check return values for Optional types before using - many methods return None on failure
  • Use get_document_with_text_sections or get_document_with_all_sections when you need section content, not just document metadata
  • When adding sections, verify the section exists first or rely on the method's built-in verification
  • Position parameter of -1 in add_text_section_to_document appends to end; specific positions insert at that index
  • duplicate_document creates deep copies of sections (new section instances), while duplicate_document_with_references creates shallow copies (references to same sections)
  • Always create a version snapshot before making significant changes using create_document_version
  • When deleting documents, consider whether to delete associated text sections based on whether they're used elsewhere
  • Section positions are automatically recalculated when adding, removing, or moving sections
  • The service updates document.updated_at timestamp automatically on modifications
  • Use get_text_section_usage to check if a text section is used in multiple documents before deletion
  • Document sections are stored both in the document JSON and in a separate document_sections table for querying
  • The is_copy flag in DocumentSection indicates whether the section was duplicated or is a reference

Similar Components

AI-powered semantic similarity - components with related functionality:

  • class TextSectionService 78.1% similar

    Service class for managing TextSection entities, providing CRUD operations, versioning, chat functionality, and search capabilities.

    From: /tf/active/vicechatdev/vice_ai/services.py
  • class DataSectionService 69.1% similar

    Service class for managing DataSection entities, providing CRUD operations and specialized update methods for analysis sessions, plots, and conclusions.

    From: /tf/active/vicechatdev/vice_ai/services.py
  • class DocumentSection 68.1% similar

    A class representing a section within a complex document, supporting hierarchical structure with headers, text content, and references.

    From: /tf/active/vicechatdev/vice_ai/complex_app.py
  • class DocumentExportService 66.2% similar

    Service class for exporting documents in various formats with enhanced formatting, providing both full structured exports and content-only exports.

    From: /tf/active/vicechatdev/vice_ai/services.py
  • class Document 66.2% similar

    A dataclass representing a document with hierarchical structure, versioning, metadata, and collaboration features.

    From: /tf/active/vicechatdev/vice_ai/models.py
← Back to Browse