function archive_document
Archives a controlled document by changing its status and all versions to ARCHIVED, moving the published PDF to an archived location with metadata, and logging the archival event.
/tf/active/vicechatdev/CDocs/controllers/document_controller.py
2140 - 2359
complex
Purpose
This function handles the complete archival workflow for controlled documents in a document management system. It validates permissions and business rules, updates document and version statuses, manages PDF file relocation in FileCloud storage, adds comprehensive metadata to archived files, logs audit trails, and sends notifications. It's designed for document lifecycle management where documents need to be formally archived while maintaining traceability and compliance requirements.
Source Code
def archive_document(
user: DocUser,
document_uid: str,
archive_reason: str,
archive_comment: Optional[str] = None
) -> Dict[str, Any]:
"""
Archive a document, changing its status and all versions to ARCHIVED.
Moves the published PDF to an archived location and adds metadata.
Args:
user: User archiving the document
document_uid: UID of document to archive
archive_reason: Reason for archiving
archive_comment: Optional comment about archiving
Returns:
Dictionary with archive status
Raises:
ResourceNotFoundError: If document not found
ValidationError: If validation fails
PermissionError: If user doesn't have permission
BusinessRuleError: If archiving is not allowed
"""
logger.info(f"Archiving document {document_uid} by user {user.username}")
try:
# Get document instance
document = ControlledDocument(uid=document_uid)
if not document:
raise ResourceNotFoundError(f"Document not found: {document_uid}")
# Check if document can be archived
if document.status not in [STATUS_PUBLISHED, STATUS_EFFECTIVE]:
logger.warning(f"Attempting to archive document with status {document.status}")
if not permissions.user_has_permission(user, "FORCE_ARCHIVE_DOCUMENT"):
raise BusinessRuleError(
f"Document must be in PUBLISHED or EFFECTIVE status to be archived. Current status: {document.status}"
)
# Validate archive reason
if not archive_reason:
raise ValidationError("Archive reason is required")
# Store previous status for audit
previous_status = document.status
# 1. Update document status to ARCHIVED
document.status = STATUS_ARCHIVED
# 2. Update all versions to ARCHIVED status
all_versions = document.get_all_versions()
for version in all_versions:
version.status = STATUS_ARCHIVED
version.save()
logger.info(f"Updated version {version.uid} ({version.version_number}) to ARCHIVED status")
# 3. Handle the published PDF - move it to archive location with metadata
archived_pdf_path = None
if previous_status in [STATUS_PUBLISHED, STATUS_EFFECTIVE]:
current_version = document.current_version
if current_version and current_version.pdf_file_path:
try:
# Get FileCloud client
client = get_filecloud_client()
# Get original PDF path
original_pdf_path = current_version.pdf_file_path
# Determine new path with _archived suffix
path_parts = os.path.splitext(original_pdf_path)
archived_pdf_path = f"{path_parts[0]}_archived{path_parts[1]}"
# Copy file to new location
file_content = download_document_from_filecloud(
document_uid=document_uid,
user=None # System operation
)
if isinstance(file_content, bytes):
# Save to temporary file
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as temp_file:
temp_path = temp_file.name
temp_file.write(file_content)
try:
# Get folder path and filename
folder_path = os.path.dirname(archived_pdf_path)
filename = os.path.basename(archived_pdf_path)
# Upload archived version
result = client.upload_file(
local_file_path=temp_path,
remote_path=folder_path,
filename=filename,
overwrite=True
)
if not result.get('success', False):
logger.warning(f"Failed to upload archived PDF: {result.get('message', 'Unknown error')}")
else:
logger.info(f"Successfully archived PDF to {archived_pdf_path}")
# Add metadata to the archived PDF
try:
# Prepare metadata for archived PDF
archived_metadata = {
"cdoc_uid": document.uid,
"doc_number": document.doc_number,
"doc_uid": current_version.uid,
"version_number": current_version.version_number,
"doc_type": document.doc_type,
"department": document.department,
"is_cdoc": "true",
"status": STATUS_ARCHIVED,
"archive_reason": archive_reason,
"archive_comment": archive_comment or "",
"archived_by": user.username,
"archived_date": datetime.now().isoformat(),
"owner": document.owner.name if document.owner else "Unknown"
}
# Import metadata catalog
from CDocs.utils.metadata_catalog import MetadataCatalog
catalog = MetadataCatalog(client, debug=True)
catalog.initialize()
# Add metadata set to archived PDF
set_result = catalog.add_set_to_file_object(archived_pdf_path, set_name="CDocs")
if not set_result.get('success', False):
logger.warning(f"Failed to add metadata set to archived PDF: {set_result.get('message', 'Unknown error')}")
# Save metadata values
metadata_result = catalog.save_attribute_values_by_name(archived_pdf_path, "CDocs", archived_metadata)
if not metadata_result.get('success', False):
logger.warning(f"Failed to set metadata on archived PDF: {metadata_result.get('message', 'Unknown error')}")
else:
logger.info(f"Added metadata to archived PDF at {archived_pdf_path}")
# Delete the original published PDF
delete_result = client.delete_file(
path=os.path.dirname(original_pdf_path),
name=os.path.basename(original_pdf_path)
)
if not delete_result.get('success', False):
logger.warning(f"Failed to delete original published PDF: {delete_result.get('message', 'Unknown error')}")
else:
logger.info(f"Successfully deleted original published PDF from {original_pdf_path}")
except Exception as metadata_err:
logger.warning(f"Error adding metadata to archived PDF: {metadata_err}")
finally:
# Clean up temporary file
try:
os.unlink(temp_path)
except:
pass
else:
logger.warning(f"Could not download PDF content for archiving")
except Exception as e:
logger.warning(f"Error archiving PDF: {e}")
# 4. Add archive info to metadata
archive_metadata = {
'archived_by': user.username,
'archived_date': datetime.now().isoformat(),
'archive_reason': archive_reason,
'archive_comment': archive_comment or "",
'archived_pdf_path': archived_pdf_path
}
# Update document metadata with archive info
if not hasattr(document, 'metadata') or not document.metadata:
document.metadata = archive_metadata
else:
document.metadata.update(archive_metadata)
# Save changes
document.save()
# Log archive event
audit_trail.log_document_lifecycle_event(
event_type="DOCUMENT_ARCHIVED",
user=user,
document_uid=document_uid,
details={
"previous_status": previous_status,
"archive_reason": archive_reason,
"archive_comment": archive_comment,
"versions_archived": len(all_versions),
"archived_pdf_path": archived_pdf_path
}
)
# Notify about archiving
notifications.notify_document_update(document, "DOCUMENT_ARCHIVED")
return {
"success": True,
"document_uid": document_uid,
"document_number": document.doc_number,
"title": document.title,
"previous_status": previous_status,
"new_status": STATUS_ARCHIVED,
"archive_reason": archive_reason,
"archive_comment": archive_comment,
"versions_archived": len(all_versions),
"archived_pdf_path": archived_pdf_path,
"message": f"Document {document.doc_number} and all {len(all_versions)} versions archived successfully"
}
except (ResourceNotFoundError, ValidationError, PermissionError, BusinessRuleError) as e:
# Re-raise known errors
raise
except Exception as e:
logger.error(f"Error archiving document {document_uid}: {e}")
logger.error(traceback.format_exc())
raise BusinessRuleError(f"Failed to archive document: {e}")
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
user |
DocUser | - | positional_or_keyword |
document_uid |
str | - | positional_or_keyword |
archive_reason |
str | - | positional_or_keyword |
archive_comment |
Optional[str] | None | positional_or_keyword |
Parameter Details
user: DocUser object representing the user performing the archive operation. Used for permission checks, audit logging, and metadata tracking. Must have ARCHIVE_DOCUMENT permission (or FORCE_ARCHIVE_DOCUMENT for non-standard status documents).
document_uid: String containing the unique identifier (UID) of the document to be archived. Used to retrieve the ControlledDocument instance from the database.
archive_reason: Required string explaining why the document is being archived. Cannot be empty. Stored in metadata and audit logs for compliance and traceability.
archive_comment: Optional string providing additional context or notes about the archival. Defaults to None. Stored in metadata alongside the archive reason for future reference.
Return Value
Type: Dict[str, Any]
Returns a dictionary containing archive operation results with keys: 'success' (bool, always True on successful completion), 'document_uid' (str, UID of archived document), 'document_number' (str, document number), 'title' (str, document title), 'previous_status' (str, status before archiving), 'new_status' (str, STATUS_ARCHIVED), 'archive_reason' (str, provided reason), 'archive_comment' (str or empty string), 'versions_archived' (int, count of versions archived), 'archived_pdf_path' (str or None, path to archived PDF file), and 'message' (str, success message with details).
Dependencies
loggingostempfiletypingdatetimetracebackCDocs.dbCDocs.config.settingsCDocs.config.permissionsCDocs.models.documentCDocs.models.user_extensionsCDocs.utils.notificationsCDocs.utils.audit_trailCDocs.controllersCDocs.controllers.filecloud_controllerCDocs.utils.metadata_catalogCDocs.models.document_status
Required Imports
import logging
import os
import tempfile
import traceback
from typing import Dict, Any, Optional
from datetime import datetime
from CDocs.models.document import ControlledDocument
from CDocs.models.user_extensions import DocUser
from CDocs.utils import notifications, audit_trail
from CDocs.config import permissions
from CDocs.controllers import require_permission, log_controller_action, transaction
from CDocs.controllers import ResourceNotFoundError, ValidationError, PermissionError, BusinessRuleError
from CDocs.controllers.filecloud_controller import download_document_from_filecloud, get_filecloud_client
from CDocs.models.document_status import STATUS_PUBLISHED, STATUS_EFFECTIVE, STATUS_ARCHIVED
Conditional/Optional Imports
These imports are only needed under specific conditions:
from CDocs.utils.metadata_catalog import MetadataCatalog
Condition: only when archiving published/effective documents with PDFs to add metadata to archived PDF files
Required (conditional)Usage Example
from CDocs.models.user_extensions import DocUser
from CDocs.controllers.document_controller import archive_document
from CDocs.controllers import ResourceNotFoundError, ValidationError, BusinessRuleError
# Get the user performing the archive
user = DocUser.get_by_username('john.doe')
# Archive a document with reason and comment
try:
result = archive_document(
user=user,
document_uid='doc-12345-abcde',
archive_reason='Product discontinued',
archive_comment='Replaced by new version in separate document series'
)
print(f"Success: {result['message']}")
print(f"Archived {result['versions_archived']} versions")
print(f"Previous status: {result['previous_status']}")
print(f"Archived PDF location: {result['archived_pdf_path']}")
except ResourceNotFoundError as e:
print(f"Document not found: {e}")
except ValidationError as e:
print(f"Validation error: {e}")
except PermissionError as e:
print(f"Permission denied: {e}")
except BusinessRuleError as e:
print(f"Business rule violation: {e}")
Best Practices
- Always provide a meaningful archive_reason as it's required for compliance and audit purposes
- Ensure the user has appropriate ARCHIVE_DOCUMENT permission before calling this function
- The function is decorated with @transaction, so database changes are atomic and will rollback on errors
- Only documents in PUBLISHED or EFFECTIVE status can be archived unless user has FORCE_ARCHIVE_DOCUMENT permission
- The function handles PDF file operations gracefully - if PDF archiving fails, it logs warnings but continues with document archival
- All versions of the document are archived together, not just the current version
- The original published PDF is deleted after successful archival to the new location
- Archived PDFs are renamed with '_archived' suffix before the file extension
- Comprehensive metadata is added to archived PDFs including archive reason, date, user, and document details
- The function sends notifications and logs audit events automatically
- Handle all four exception types (ResourceNotFoundError, ValidationError, PermissionError, BusinessRuleError) in calling code
- The function uses temporary files for PDF operations and cleans them up automatically
- Archive metadata is stored both in the document's metadata field and in FileCloud metadata catalog
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function archive_document_v1 96.7% similar
-
function publish_document 72.5% similar
-
function publish_document_v1 71.2% similar
-
function delete_document 69.9% similar
-
function upload_document_to_filecloud 66.4% similar