function get_documents
Retrieves controlled documents from a Neo4j database with comprehensive filtering, permission-based access control, pagination, and full-text search capabilities.
/tf/active/vicechatdev/CDocs/controllers/document_controller.py
2875 - 3176
complex
Purpose
This function serves as the primary document retrieval interface for a document management system. It queries a Neo4j graph database to fetch ControlledDocument nodes based on multiple optional filters (type, status, department, owner, date range, search text). It implements role-based access control, distinguishing between admin users who can view all documents and regular users who can only see documents they own, created, or have been shared with. The function includes robust error handling with fallback queries, date format normalization, and JSON-serializable output formatting.
Source Code
def get_documents(
user: DocUser,
doc_type: Optional[str] = None,
status: Optional[str] = None,
department: Optional[str] = None,
owner: Optional[str] = None,
date_from: Optional[str] = None,
date_to: Optional[str] = None,
search_text: Optional[str] = None,
include_content: bool = False,
limit: int = 100,
offset: int = 0
) -> Dict[str, Any]:
"""
Get documents with optional filtering.
Args:
user: User requesting the documents
doc_type: Filter by document type (code)
status: Filter by document status (code)
department: Filter by department (code)
owner: Filter by document owner UID
date_from: Filter by creation date (from) - ISO format string
date_to: Filter by creation date (to) - ISO format string
search_text: Search in title, content, or document number
include_content: Whether to include document content
limit: Maximum number of documents to return
offset: Number of documents to skip (for pagination)
Returns:
Dictionary with document results
"""
logger.debug(f"Getting documents for user {user.username if hasattr(user, 'username') else 'Unknown'}")
logger.debug(f"Filters - Type: {doc_type}, Status: {status}, Department: {department}, Owner: {owner}")
logger.debug(f"Date range: {date_from} to {date_to}, Search text: '{search_text}'")
try:
# Build filters for query
where_filters = []
params = {
"limit": limit,
"offset": offset
}
# Add document type filter
if doc_type and doc_type.strip():
where_filters.append("doc.docType = $doc_type")
params["doc_type"] = doc_type.strip()
logger.debug(f"Added doc_type filter: {doc_type}")
# Add status filter
if status and status.strip():
where_filters.append("doc.status = $status")
params["status"] = status.strip()
logger.debug(f"Added status filter: {status}")
# Add department filter
if department and department.strip():
where_filters.append("doc.department = $department")
params["department"] = department.strip()
logger.debug(f"Added department filter: {department}")
# Add owner filter
if owner and owner.strip():
where_filters.append("doc.ownerUID = $owner")
params["owner"] = owner.strip()
logger.debug(f"Added owner filter: {owner}")
# Add date filters - handle both string and date formats
if date_from:
try:
# Convert to proper datetime format if needed
if isinstance(date_from, str):
# Handle different date formats
if 'T' not in date_from:
date_from = f"{date_from}T00:00:00"
where_filters.append("doc.createdDate >= datetime($date_from)")
params["date_from"] = date_from
logger.debug(f"Added date_from filter: {date_from}")
except Exception as date_err:
logger.warning(f"Invalid date_from format: {date_from}, error: {date_err}")
if date_to:
try:
# Convert to proper datetime format if needed
if isinstance(date_to, str):
# Handle different date formats
if 'T' not in date_to:
date_to = f"{date_to}T23:59:59"
where_filters.append("doc.createdDate <= datetime($date_to)")
params["date_to"] = date_to
logger.debug(f"Added date_to filter: {date_to}")
except Exception as date_err:
logger.warning(f"Invalid date_to format: {date_to}, error: {date_err}")
# Add search text filter - search in multiple fields
if search_text and search_text.strip():
search_term = search_text.strip()
where_filters.append("""
(toLower(doc.title) CONTAINS toLower($search_text) OR
toLower(doc.docNumber) CONTAINS toLower($search_text) OR
toLower(doc.description) CONTAINS toLower($search_text))
""")
params["search_text"] = search_term
logger.debug(f"Added search filter: {search_term}")
# Add user UID parameter for permission checks
if hasattr(user, 'uid'):
params["user_uid"] = user.uid
elif hasattr(user, 'UID'):
params["user_uid"] = user.UID
else:
logger.warning("User object missing UID attribute")
params["user_uid"] = ""
# Check if user has admin permissions
try:
from CDocs.config import permissions
is_admin = permissions.user_has_permission(user, "VIEW_ALL_DOCUMENTS")
except Exception:
# Fallback: assume user can see all documents for now
is_admin = True
logger.debug("Assuming admin permissions for document access")
# Build the WHERE clause
where_clause = " AND ".join(where_filters) if where_filters else "1=1"
# Build the query based on user permissions
if is_admin:
# Admins can see all documents
query = f"""
MATCH (doc:ControlledDocument)
WHERE {where_clause}
RETURN doc
ORDER BY doc.modifiedDate DESC, doc.createdDate DESC
SKIP $offset LIMIT $limit
"""
count_query = f"""
MATCH (doc:ControlledDocument)
WHERE {where_clause}
RETURN count(doc) as total
"""
else:
# Regular users - filter by permissions
query = f"""
MATCH (doc:ControlledDocument)
WHERE {where_clause}
AND (
doc.creatorUID = $user_uid
OR doc.ownerUID = $user_uid
OR doc.isPublic = true
OR EXISTS {{
MATCH (doc)-[:SHARED_WITH]->(u:User {{UID: $user_uid}})
}}
)
RETURN doc
ORDER BY doc.modifiedDate DESC, doc.createdDate DESC
SKIP $offset LIMIT $limit
"""
count_query = f"""
MATCH (doc:ControlledDocument)
WHERE {where_clause}
AND (
doc.creatorUID = $user_uid
OR doc.ownerUID = $user_uid
OR doc.isPublic = true
OR EXISTS {{
MATCH (doc)-[:SHARED_WITH]->(u:User {{UID: $user_uid}})
}}
)
RETURN count(doc) as total
"""
# Execute query
logger.debug(f"Executing document query: {query}")
logger.debug(f"With parameters: {params}")
result = db.run_query(query, params)
if not result:
logger.debug("No documents found")
return {
"success": True,
"count": 0,
"total": 0,
"documents": [],
"has_more": False,
"filters_applied": {
"doc_type": doc_type,
"status": status,
"department": department,
"owner": owner,
"date_from": date_from,
"date_to": date_to,
"search_text": search_text
}
}
# Extract documents from result
documents = []
for record in result:
doc_data = dict(record["doc"])
# Convert Neo4j datetime objects to ISO strings for JSON serialization
for field in ['createdDate', 'modifiedDate', 'effectiveDate', 'reviewDate']:
if field in doc_data and doc_data[field]:
try:
if hasattr(doc_data[field], 'isoformat'):
doc_data[field] = doc_data[field].isoformat()
elif hasattr(doc_data[field], 'iso_format'):
doc_data[field] = doc_data[field].iso_format()
except Exception as date_conv_err:
logger.warning(f"Error converting date field {field}: {date_conv_err}")
doc_data[field] = str(doc_data[field])
documents.append(doc_data)
# Get total count
try:
count_result = db.run_query(count_query, params)
total_count = count_result[0]["total"] if count_result else len(documents)
except Exception as count_err:
logger.warning(f"Error getting total count: {count_err}")
total_count = len(documents)
logger.debug(f"Found {len(documents)} documents out of {total_count} total")
# Return results
return {
"success": True,
"count": len(documents),
"total": total_count,
"documents": documents,
"has_more": (offset + len(documents) < total_count),
"filters_applied": {
"doc_type": doc_type,
"status": status,
"department": department,
"owner": owner,
"date_from": date_from,
"date_to": date_to,
"search_text": search_text
}
}
except Exception as e:
logger.error(f"Error getting documents: {e}")
logger.error(traceback.format_exc())
# Fallback: try a simple query to at least return some documents
try:
logger.debug("Attempting fallback query...")
fallback_query = """
MATCH (doc:ControlledDocument)
RETURN doc
ORDER BY doc.modifiedDate DESC
LIMIT 10
"""
fallback_result = db.run_query(fallback_query)
fallback_docs = []
if fallback_result:
for record in fallback_result:
doc_data = dict(record["doc"])
# Convert dates for JSON serialization
for field in ['createdDate', 'modifiedDate']:
if field in doc_data and doc_data[field]:
try:
if hasattr(doc_data[field], 'isoformat'):
doc_data[field] = doc_data[field].isoformat()
else:
doc_data[field] = str(doc_data[field])
except:
doc_data[field] = str(doc_data[field])
fallback_docs.append(doc_data)
logger.debug(f"Fallback query returned {len(fallback_docs)} documents")
return {
"success": False,
"error": str(e),
"documents": fallback_docs,
"count": len(fallback_docs),
"total": len(fallback_docs),
"has_more": False,
"message": "Error occurred during filtering, showing recent documents instead"
}
except Exception as fallback_error:
logger.error(f"Fallback query also failed: {fallback_error}")
return {
"success": False,
"error": str(e),
"documents": [],
"count": 0,
"total": 0,
"has_more": False,
"message": "Unable to retrieve documents due to database error"
}
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
user |
DocUser | - | positional_or_keyword |
doc_type |
Optional[str] | None | positional_or_keyword |
status |
Optional[str] | None | positional_or_keyword |
department |
Optional[str] | None | positional_or_keyword |
owner |
Optional[str] | None | positional_or_keyword |
date_from |
Optional[str] | None | positional_or_keyword |
date_to |
Optional[str] | None | positional_or_keyword |
search_text |
Optional[str] | None | positional_or_keyword |
include_content |
bool | False | positional_or_keyword |
limit |
int | 100 | positional_or_keyword |
offset |
int | 0 | positional_or_keyword |
Parameter Details
user: DocUser object representing the authenticated user making the request. Must have either 'uid' or 'UID' attribute for permission checks and 'username' for logging. Used to determine access permissions and filter documents based on ownership/sharing.
doc_type: Optional string code to filter documents by their type (e.g., 'SOP', 'POLICY', 'PROCEDURE'). If provided, only documents with matching docType property are returned. Whitespace is stripped before comparison.
status: Optional string code to filter documents by their current status (e.g., 'DRAFT', 'PUBLISHED', 'ARCHIVED'). Matches against the document's status property. Whitespace is stripped.
department: Optional string code to filter documents by department ownership (e.g., 'HR', 'IT', 'FINANCE'). Matches the department property. Whitespace is stripped.
owner: Optional string UID to filter documents by their owner. Matches against the ownerUID property to find documents owned by a specific user.
date_from: Optional ISO format date string (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS) to filter documents created on or after this date. Automatically appends T00:00:00 if time component is missing.
date_to: Optional ISO format date string (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS) to filter documents created on or before this date. Automatically appends T23:59:59 if time component is missing.
search_text: Optional string to perform case-insensitive full-text search across document title, docNumber, and description fields. Uses CONTAINS operator for partial matching.
include_content: Boolean flag indicating whether to include the full document content in results. Currently not implemented in the query but reserved for future use. Default is False.
limit: Integer specifying maximum number of documents to return in a single query. Used for pagination. Default is 100. Must be positive.
offset: Integer specifying number of documents to skip before returning results. Used for pagination to fetch subsequent pages. Default is 0.
Return Value
Type: Dict[str, Any]
Returns a dictionary with keys: 'success' (bool indicating query success), 'count' (int of documents returned in current page), 'total' (int of total matching documents), 'documents' (list of document dictionaries with all properties including converted ISO date strings), 'has_more' (bool indicating if more pages exist), 'filters_applied' (dict echoing all filter parameters used), and optionally 'error' and 'message' strings if an error occurred. On error, attempts fallback query returning up to 10 recent documents.
Dependencies
logginguuidostempfiletypingdatetimeiopanelshutiltracebackjsonrerandomCDocs.dbCDocs.config.settingsCDocs.config.permissionsCDocs.models.documentCDocs.models.user_extensionsCDocs.utils.document_processorCDocs.utils.notificationsCDocs.utils.audit_trailCDocs.db.schema_managerCDocs.controllersCDocs.models.approvalCDocs.controllers.filecloud_controllerCDocs.controllers.share_controllerCDocs.db.db_operationsCDocs.utils.document_converterCDocs.models.document_statusCDocs.utils.filecloud_integrationCDocs.utils.notificationsCDocs.models.reviewdocument_auditor.src.document_processorCDocs.document_auditor.src.document_processorCDocs.controllers.training_controllerCDocs.utils.metadata_catalog
Required Imports
import logging
import traceback
from typing import Dict, Any, Optional
from CDocs import db
from CDocs.models.user_extensions import DocUser
Conditional/Optional Imports
These imports are only needed under specific conditions:
from CDocs.config import permissions
Condition: Required for checking user admin permissions via permissions.user_has_permission(). If this import fails, function assumes admin permissions as fallback.
OptionalUsage Example
# Basic usage - get all documents for a user
from CDocs.models.user_extensions import DocUser
from CDocs.controllers.document_controller import get_documents
# Create user object
user = DocUser(uid='user123', username='john.doe')
# Get all documents with default pagination
result = get_documents(user)
print(f"Found {result['count']} documents out of {result['total']} total")
for doc in result['documents']:
print(f"- {doc['title']} ({doc['docNumber']})")
# Advanced filtering example
filtered_result = get_documents(
user=user,
doc_type='SOP',
status='PUBLISHED',
department='QA',
date_from='2024-01-01',
date_to='2024-12-31',
search_text='quality',
limit=50,
offset=0
)
if filtered_result['success']:
print(f"Filtered results: {filtered_result['count']} documents")
print(f"Has more pages: {filtered_result['has_more']}")
for doc in filtered_result['documents']:
print(f"{doc['docNumber']}: {doc['title']} - {doc['status']}")
else:
print(f"Error: {filtered_result.get('error', 'Unknown error')}")
# Pagination example
page_size = 20
page_num = 2
paginated_result = get_documents(
user=user,
limit=page_size,
offset=(page_num - 1) * page_size
)
Best Practices
- Always provide a valid DocUser object with uid/UID attribute to ensure proper permission checks
- Use pagination (limit/offset) when dealing with large document sets to avoid memory issues and improve performance
- Date filters should be in ISO format (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS) for consistent behavior
- Check the 'success' field in the returned dictionary before processing results, as errors may return partial data via fallback query
- The function strips whitespace from filter parameters, so no need to pre-process filter strings
- Search text performs case-insensitive partial matching across multiple fields (title, docNumber, description)
- Admin users (with VIEW_ALL_DOCUMENTS permission) bypass ownership filters and see all documents
- Regular users only see documents they own, created, are shared with, or marked as public
- Use 'has_more' field to determine if additional pages exist for pagination UI
- Date fields in returned documents are converted to ISO format strings for JSON serialization compatibility
- The function includes comprehensive logging at debug level for troubleshooting filter application and query execution
- If the main query fails, a fallback query returns up to 10 recent documents to provide some user feedback
- The include_content parameter is currently not implemented but reserved for future functionality
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function get_documents_v1 96.5% similar
-
function search_documents_v1 85.9% similar
-
function search_documents 80.7% similar
-
function get_all_documents 79.2% similar
-
function search_documents_in_filecloud 67.3% similar