function get_documents_v1
Retrieves filtered and paginated documents from a Neo4j graph database with permission-based access control, supporting multiple filter criteria and search functionality.
/tf/active/vicechatdev/document_controller_backup.py
1955 - 2157
complex
Purpose
This function serves as the primary document retrieval interface for a document management system. It queries a Neo4j database for ControlledDocument nodes, applying various filters (type, status, department, date range, search text) and enforcing user permissions. It supports pagination and includes fallback error handling to ensure graceful degradation. The function is designed to handle both admin users (who see all documents) and regular users (who see only documents they have permission to access).
Source Code
def get_documents(
user: DocUser,
doc_type: Optional[str] = None,
status: Optional[str] = None,
department: Optional[str] = None,
date_from: Optional[str] = None,
date_to: Optional[str] = None,
search_text: Optional[str] = None,
include_content: bool = False,
limit: int = 100,
offset: int = 0
) -> Dict[str, Any]:
"""
Get documents with optional filtering.
Args:
user: User requesting the documents
doc_type: Filter by document type
status: Filter by document status
department: Filter by department
date_from: Filter by creation date (from)
date_to: Filter by creation date (to)
search_text: Search in title, content, or document number
include_content: Whether to include document content
limit: Maximum number of documents to return
offset: Offset for pagination
Returns:
Dictionary with document results
"""
logger.debug(f"Getting documents for user {user.username}")
logger.debug(f"Filters - Type: {doc_type}, Status: {status}, Department: {department}")
logger.debug(f"Search text: {search_text}")
try:
# Build filters for query that will be applied in the main MATCH clause
where_filters = []
params = {
"limit": limit,
"offset": offset
}
# Add document type filter - using the correct property name docType (not doc_type)
if doc_type:
where_filters.append("doc.docType = $doc_type")
params["doc_type"] = doc_type
# Add status filter - using the correct property name status
if status:
where_filters.append("doc.status = $status")
params["status"] = status
# Add department filter - using the correct property name department
if department:
where_filters.append("doc.department = $department")
params["department"] = department
# Add date filters - using the correct property name createdDate (not created_date)
if date_from:
where_filters.append("doc.createdDate >= datetime($date_from)")
params["date_from"] = date_from
if date_to:
where_filters.append("doc.createdDate <= datetime($date_to)")
params["date_to"] = date_to
# Add search text filter - using the correct property names
if search_text:
where_filters.append("""
(doc.title CONTAINS $search_text OR
doc.docNumber CONTAINS $search_text)
""")
params["search_text"] = search_text
# Add user UID parameter for permission checks
params["user_uid"] = user.uid
# Check if user has admin permissions
is_admin = True
# Build the base query differently based on user permissions
if is_admin:
# Admins can see all documents - use a simple query with WHERE filters
where_clause = " AND ".join(where_filters) if where_filters else "1=1"
# Use a simple MATCH without permissions for now, using correct property names
query = f"""
MATCH (doc:ControlledDocument)
WHERE {where_clause}
RETURN doc as document
ORDER BY doc.modifiedDate DESC
SKIP $offset LIMIT $limit
"""
# Similar count query for admins
count_query = f"""
MATCH (doc:ControlledDocument)
WHERE {where_clause}
RETURN count(doc) as total
"""
else:
# Regular users - use a more complex query that checks permissions
# We'll use proper MATCH patterns instead of pattern expressions in WHERE
# Build WHERE clause from existing filters
doc_filter_clause = " AND ".join(where_filters) if where_filters else "1=1"
# Query for documents that the user can access - using correct property names
query = f"""
MATCH (doc:ControlledDocument)
WHERE {doc_filter_clause}
AND (
// Documents created by this user
doc.creatorUID = $user_uid
OR
// Documents with public access
doc.isPublic = true
OR
// For now, assuming all docs visible to user
true
)
RETURN doc as document
ORDER BY doc.modifiedDate DESC
SKIP $offset LIMIT $limit
"""
# Similar count query for regular users
count_query = f"""
MATCH (doc:ControlledDocument)
WHERE {doc_filter_clause}
AND (
doc.creatorUID = $user_uid
OR
doc.isPublic = true
OR
// For now, assuming all docs visible to user
true
)
RETURN count(doc) as total
"""
# Execute query
logger.debug(f"Executing document query: {query}")
logger.debug(f"With parameters: {params}")
result = db.run_query(query, params)
if not result:
return {"success": True, "count": 0, "documents": []}
# Parse results - return documents directly without field selection
documents = [record["document"] for record in result]
# Get total count
count_result = db.run_query(count_query, params)
total_count = count_result[0]["total"] if count_result else len(documents)
# Return results
return {
"success": True,
"count": len(documents),
"total": total_count,
"documents": documents,
"has_more": (offset + len(documents) < total_count)
}
except Exception as e:
logger.error(f"Error getting documents: {e}")
import traceback
logger.error(traceback.format_exc())
# In case of database error, provide a fallback using a very simple query
try:
# Try a very simple query that should work in most cases
simple_query = """
MATCH (doc:ControlledDocument)
RETURN doc as document
LIMIT 10
"""
simple_result = db.run_query(simple_query)
docs = []
if simple_result:
docs = [record["document"] for record in simple_result]
return {
"success": False,
"error": str(e),
"documents": docs,
"count": len(docs),
"total": len(docs)
}
except Exception as fallback_error:
# Last resort - return empty result with error
logger.error(f"Fallback error: {fallback_error}")
return {
"success": False,
"error": str(e),
"documents": [],
"count": 0,
"total": 0
}
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
user |
DocUser | - | positional_or_keyword |
doc_type |
Optional[str] | None | positional_or_keyword |
status |
Optional[str] | None | positional_or_keyword |
department |
Optional[str] | None | positional_or_keyword |
date_from |
Optional[str] | None | positional_or_keyword |
date_to |
Optional[str] | None | positional_or_keyword |
search_text |
Optional[str] | None | positional_or_keyword |
include_content |
bool | False | positional_or_keyword |
limit |
int | 100 | positional_or_keyword |
offset |
int | 0 | positional_or_keyword |
Parameter Details
user: DocUser object representing the authenticated user making the request. Must have 'username' and 'uid' attributes for logging and permission checks.
doc_type: Optional string to filter documents by their docType property. Pass None to skip this filter. Examples: 'SOP', 'Policy', 'Procedure'.
status: Optional string to filter documents by their status property. Pass None to skip this filter. Examples: 'Draft', 'Published', 'Archived'.
department: Optional string to filter documents by their department property. Pass None to skip this filter. Examples: 'Engineering', 'HR', 'Finance'.
date_from: Optional ISO format date string (YYYY-MM-DD) to filter documents created on or after this date. Uses the createdDate property.
date_to: Optional ISO format date string (YYYY-MM-DD) to filter documents created on or before this date. Uses the createdDate property.
search_text: Optional string to search within document title and docNumber properties. Uses case-sensitive CONTAINS matching.
include_content: Boolean flag indicating whether to include document content in results. Currently not implemented in the query logic.
limit: Integer specifying maximum number of documents to return per page. Default is 100. Used for pagination.
offset: Integer specifying the number of documents to skip. Default is 0. Used for pagination to calculate page position.
Return Value
Type: Dict[str, Any]
Returns a dictionary with keys: 'success' (bool indicating query success), 'count' (int of documents returned in current page), 'total' (int of total matching documents), 'documents' (list of document node dictionaries with properties like docType, status, title, createdDate, modifiedDate, etc.), 'has_more' (bool indicating if more pages exist), and optionally 'error' (string with error message if success is False). On error, returns partial results with available documents or empty list.
Dependencies
loggingtypingCDocs.dbCDocs.models.user_extensions
Required Imports
from typing import Dict, Any, Optional
from CDocs.models.user_extensions import DocUser
from CDocs import db
import logging
Conditional/Optional Imports
These imports are only needed under specific conditions:
import traceback
Condition: Used in exception handling blocks for detailed error logging
Required (conditional)Usage Example
from typing import Dict, Any
from CDocs.models.user_extensions import DocUser
from CDocs import db
import logging
# Setup logger
logger = logging.getLogger(__name__)
# Create a user object
user = DocUser(username='john.doe', uid='user-123')
# Get all documents for user with pagination
result = get_documents(user=user, limit=50, offset=0)
print(f"Found {result['total']} documents, showing {result['count']}")
# Filter by document type and status
result = get_documents(
user=user,
doc_type='SOP',
status='Published',
limit=20
)
# Search with date range
result = get_documents(
user=user,
search_text='quality',
date_from='2024-01-01',
date_to='2024-12-31',
department='Engineering'
)
# Process results
if result['success']:
for doc in result['documents']:
print(f"Document: {doc.get('title')} - Status: {doc.get('status')}")
if result['has_more']:
print("More documents available on next page")
else:
print(f"Error: {result.get('error')}")
Best Practices
- Always pass a valid DocUser object with uid and username attributes
- Use pagination (limit/offset) for large result sets to avoid memory issues
- Date filters should be in ISO format (YYYY-MM-DD) for proper datetime comparison
- The function has a hardcoded 'is_admin = True' which means all users currently have admin access - this should be replaced with actual permission checking logic
- Search text uses CONTAINS which is case-sensitive in Neo4j - consider using case-insensitive matching for better UX
- The include_content parameter is not currently implemented in the query logic
- Function includes fallback error handling that returns partial results - check 'success' field to determine if query completed normally
- The function logs extensively - ensure logger is properly configured to avoid performance issues
- Property names in Neo4j use camelCase (docType, createdDate, modifiedDate) not snake_case
- Total count is calculated with a separate query which may have performance implications on large datasets
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function get_documents 96.5% similar
-
function search_documents_v1 83.8% similar
-
function search_documents 82.2% similar
-
function get_all_documents 76.7% similar
-
function search_documents_in_filecloud 65.7% similar