class SharePointClient
A SharePoint client class that provides methods for connecting to SharePoint sites, retrieving documents recursively, downloading file content, and managing document metadata using app-only authentication.
/tf/active/vicechatdev/SPFCsync/sharepoint_client.py
11 - 173
moderate
Purpose
This class serves as a comprehensive interface for interacting with SharePoint document libraries. It handles authentication using Azure AD app credentials, provides recursive document discovery across folder hierarchies, downloads file content, and retrieves detailed file metadata. Designed for syncing SharePoint documents with FileCloud or other document management systems, it includes connection testing, error handling, and logging capabilities.
Source Code
class SharePointClient:
"""
SharePoint client for syncing documents with FileCloud.
"""
def __init__(self, site_url: str, client_id: str, client_secret: str):
"""
Initialize SharePoint client with app-only authentication.
Args:
site_url: SharePoint site URL
client_id: Azure AD app client ID
client_secret: Azure AD app client secret
"""
self.site_url = site_url
self.client_id = client_id
self.client_secret = client_secret
# Initialize context with app-only authentication
self.ctx = ClientContext(site_url).with_credentials(
ClientCredential(client_id, client_secret)
)
# Setup logging
self.logger = logging.getLogger(__name__)
# Test connection
self._test_connection()
def _test_connection(self) -> bool:
"""Test the SharePoint connection."""
try:
web = self.ctx.web
self.ctx.load(web)
self.ctx.execute_query()
self.logger.info(f"Successfully connected to SharePoint site: {web.properties['Title']}")
return True
except Exception as e:
self.logger.error(f"Failed to connect to SharePoint: {e}")
raise ConnectionError(f"SharePoint connection failed: {e}")
def get_all_documents(self, folder_path: str = "/Shared Documents") -> List[Dict]:
"""
Get all documents from SharePoint recursively.
Args:
folder_path: Server relative path to folder
Returns:
List of document information dictionaries
"""
documents = []
try:
self._get_documents_recursive(folder_path, documents)
self.logger.info(f"Retrieved {len(documents)} documents from SharePoint")
return documents
except Exception as e:
self.logger.error(f"Error getting documents from {folder_path}: {e}")
raise
def _get_documents_recursive(self, folder_path: str, documents: List[Dict]):
"""
Recursively get all documents from a folder and its subfolders.
Args:
folder_path: Server relative path to folder
documents: List to append document information to
"""
try:
folder = self.ctx.web.get_folder_by_server_relative_url(folder_path)
# Get files in current folder
files = folder.files
self.ctx.load(files)
self.ctx.execute_query()
for file in files:
# Skip system files and hidden files
if not file.properties['Name'].startswith('.') and not file.properties['Name'].startswith('~'):
file_info = {
'name': file.properties['Name'],
'server_relative_url': file.properties['ServerRelativeUrl'],
'size': file.properties['Length'],
'modified': file.properties['TimeLastModified'],
'created': file.properties['TimeCreated'],
'author': file.properties.get('Author', {}).get('Title', 'Unknown'),
'file_type': file.properties['Name'].split('.')[-1] if '.' in file.properties['Name'] else '',
'relative_path': file.properties['ServerRelativeUrl'].replace(
self.ctx.web.properties['ServerRelativeUrl'] + '/Shared Documents', ''
).lstrip('/')
}
documents.append(file_info)
# Get subfolders and process them recursively
folders = folder.folders
self.ctx.load(folders)
self.ctx.execute_query()
for subfolder in folders:
# Skip system folders
if not subfolder.properties['Name'].startswith('.') and \
subfolder.properties['Name'] not in ['Forms', 'Item']:
subfolder_path = subfolder.properties['ServerRelativeUrl']
self._get_documents_recursive(subfolder_path, documents)
except Exception as e:
self.logger.error(f"Error processing folder {folder_path}: {e}")
# Continue processing other folders even if one fails
def download_file_content(self, server_relative_url: str) -> Optional[bytes]:
"""
Download file content from SharePoint.
Args:
server_relative_url: SharePoint server relative URL of file
Returns:
File content as bytes, or None if failed
"""
try:
file = self.ctx.web.get_file_by_server_relative_url(server_relative_url)
# Download file content to memory
content = file.get_content()
self.ctx.execute_query()
return content.value
except Exception as e:
self.logger.error(f"Error downloading file content {server_relative_url}: {e}")
return None
def get_file_info(self, server_relative_url: str) -> Optional[Dict]:
"""
Get detailed information about a file.
Args:
server_relative_url: SharePoint server relative URL of file
Returns:
File information dictionary if successful, None otherwise
"""
try:
file = self.ctx.web.get_file_by_server_relative_url(server_relative_url)
self.ctx.load(file)
self.ctx.execute_query()
file_info = {
'name': file.properties['Name'],
'server_relative_url': file.properties['ServerRelativeUrl'],
'size': file.properties['Length'],
'modified': file.properties['TimeLastModified'],
'created': file.properties['TimeCreated'],
'version': file.properties.get('UIVersionLabel', '1.0'),
'author': file.properties.get('Author', {}).get('Title', 'Unknown'),
'file_type': file.properties['Name'].split('.')[-1] if '.' in file.properties['Name'] else ''
}
return file_info
except Exception as e:
self.logger.error(f"Error getting file info for {server_relative_url}: {e}")
return None
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
site_url: The full URL of the SharePoint site to connect to (e.g., 'https://contoso.sharepoint.com/sites/mysite'). This is the base URL for all SharePoint operations.
client_id: The Azure AD application (app registration) client ID used for app-only authentication. This ID is obtained when registering an application in Azure AD.
client_secret: The Azure AD application client secret (password) used for app-only authentication. This secret is generated in the Azure AD app registration and should be kept secure.
Return Value
Instantiation returns a SharePointClient object with an authenticated connection to the specified SharePoint site. The object maintains a ClientContext (self.ctx) for executing SharePoint operations. Methods return: get_all_documents() returns a List[Dict] of document metadata; download_file_content() returns Optional[bytes] of file content; get_file_info() returns Optional[Dict] with detailed file information; _test_connection() returns bool indicating connection success.
Class Interface
Methods
__init__(self, site_url: str, client_id: str, client_secret: str)
Purpose: Initialize the SharePoint client with app-only authentication and test the connection
Parameters:
site_url: SharePoint site URL to connect toclient_id: Azure AD app client ID for authenticationclient_secret: Azure AD app client secret for authentication
Returns: None (constructor), but raises ConnectionError if connection test fails
_test_connection(self) -> bool
Purpose: Test the SharePoint connection by loading the web properties and verifying access
Returns: True if connection successful, raises ConnectionError if connection fails
get_all_documents(self, folder_path: str = '/Shared Documents') -> List[Dict]
Purpose: Retrieve all documents from a SharePoint folder and its subfolders recursively
Parameters:
folder_path: Server relative path to the folder to scan (default: '/Shared Documents')
Returns: List of dictionaries containing document metadata (name, server_relative_url, size, modified, created, author, file_type, relative_path)
_get_documents_recursive(self, folder_path: str, documents: List[Dict])
Purpose: Internal recursive helper method to traverse folder hierarchy and collect document information
Parameters:
folder_path: Server relative path to the current folder being processeddocuments: List to append document information dictionaries to (modified in place)
Returns: None (modifies documents list in place)
download_file_content(self, server_relative_url: str) -> Optional[bytes]
Purpose: Download the binary content of a file from SharePoint into memory
Parameters:
server_relative_url: SharePoint server relative URL of the file to download
Returns: File content as bytes if successful, None if download fails
get_file_info(self, server_relative_url: str) -> Optional[Dict]
Purpose: Retrieve detailed metadata information about a specific file
Parameters:
server_relative_url: SharePoint server relative URL of the file
Returns: Dictionary with file information (name, server_relative_url, size, modified, created, version, author, file_type) if successful, None if retrieval fails
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
site_url |
str | The SharePoint site URL provided during initialization | instance |
client_id |
str | The Azure AD app client ID used for authentication | instance |
client_secret |
str | The Azure AD app client secret used for authentication | instance |
ctx |
ClientContext | The Office365 ClientContext object used for executing SharePoint operations, initialized with app-only credentials | instance |
logger |
logging.Logger | Logger instance for recording connection status, operations, and errors | instance |
Dependencies
office365-rest-python-clientlogging
Required Imports
from office365.sharepoint.client_context import ClientContext
from office365.runtime.auth.client_credential import ClientCredential
from office365.sharepoint.files.file import File
from office365.sharepoint.folders.folder import Folder
import logging
from typing import List, Dict, Optional
Usage Example
import logging
from sharepoint_client import SharePointClient
# Configure logging
logging.basicConfig(level=logging.INFO)
# Initialize the client
client = SharePointClient(
site_url='https://contoso.sharepoint.com/sites/mysite',
client_id='your-client-id',
client_secret='your-client-secret'
)
# Get all documents from default Shared Documents library
documents = client.get_all_documents()
for doc in documents:
print(f"File: {doc['name']}, Size: {doc['size']} bytes")
# Get documents from specific folder
folder_docs = client.get_all_documents('/Shared Documents/Projects')
# Download a specific file
file_content = client.download_file_content('/sites/mysite/Shared Documents/report.pdf')
if file_content:
with open('local_report.pdf', 'wb') as f:
f.write(file_content)
# Get detailed file information
file_info = client.get_file_info('/sites/mysite/Shared Documents/report.pdf')
if file_info:
print(f"Version: {file_info['version']}, Modified: {file_info['modified']}")
Best Practices
- Always handle the ConnectionError that may be raised during instantiation if SharePoint connection fails
- Store client_secret securely using environment variables or secure credential storage, never hardcode it
- The class automatically tests the connection during initialization, so wrap instantiation in try-except blocks
- Use the default folder_path parameter ('/Shared Documents') for standard SharePoint document libraries
- The class skips system files (starting with '.' or '~') and system folders ('Forms', 'Item') automatically
- File content from download_file_content() is returned as bytes and should be written in binary mode
- The recursive document retrieval continues even if individual folders fail, check logs for errors
- Ensure the Azure AD app has appropriate SharePoint permissions (Sites.Read.All minimum) before use
- The ctx (ClientContext) attribute is reused across method calls for efficiency
- Methods return None on failure rather than raising exceptions (except during initialization), always check return values
- The logger uses __name__ so configure logging at the module level for proper log attribution
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class SharePointRestClient 79.8% similar
-
class SharePointFileCloudSync 74.7% similar
-
class SharePointClient_v1 70.5% similar
-
function test_rest_client 68.2% similar
-
class SyncDiagnostics 66.6% similar