class OneDriveClient
A comprehensive Microsoft OneDrive client that uses the Microsoft Graph API to authenticate and perform file operations (upload, download, list, delete) on OneDrive storage.
/tf/active/vicechatdev/e-ink-llm/onedrive_client.py
20 - 507
complex
Purpose
This class provides a complete interface for interacting with Microsoft OneDrive through the Graph API. It handles authentication using MSAL (Microsoft Authentication Library) with support for both device code flow (interactive) and client credentials flow (service applications). The client manages token caching, automatic token refresh, folder creation, file uploads (including large file chunked uploads), downloads, listing, and deletion. It's designed for applications that need to integrate OneDrive storage capabilities, such as automated file synchronization, backup systems, or document processing pipelines.
Source Code
class OneDriveClient:
"""Microsoft OneDrive client using Graph API"""
def __init__(self, config: Dict[str, Any]):
"""
Initialize OneDrive client
Args:
config: Dictionary containing:
- client_id: Azure App Registration client ID
- client_secret: Azure App Registration client secret (optional for public clients)
- tenant_id: Azure tenant ID (optional, defaults to 'common')
- redirect_uri: Redirect URI for authentication (optional)
- scopes: List of required scopes
"""
self.config = config
self.client_id = config['client_id']
self.client_secret = config.get('client_secret')
self.tenant_id = config.get('tenant_id', 'common')
self.redirect_uri = config.get('redirect_uri', 'http://localhost:8080')
self.scopes = config.get('scopes', ['https://graph.microsoft.com/Files.ReadWrite.All'])
self.user_principal_name = config.get('user_principal_name') # For client credentials flow
# Token cache file
self.token_cache_file = Path(config.get('token_cache_file', 'onedrive_token_cache.json'))
# MSAL client app
self.app = self._create_msal_app()
# Current access token
self.access_token = None
self.token_expiry = None
# Graph API base URL
self.graph_url = "https://graph.microsoft.com/v1.0"
# Determine drive endpoint based on authentication method
if self.client_secret and self.user_principal_name:
self.drive_endpoint = f"users/{self.user_principal_name}/drive"
else:
self.drive_endpoint = "me/drive"
print("🔗 OneDrive client initialized")
def _create_msal_app(self):
"""Create MSAL application instance"""
cache = msal.SerializableTokenCache()
# Load existing token cache if available
if self.token_cache_file.exists():
try:
with open(self.token_cache_file, 'r') as f:
cache.deserialize(f.read())
except Exception as e:
print(f"⚠️ Could not load token cache: {e}")
if self.client_secret:
# Confidential client app (with client secret)
app = msal.ConfidentialClientApplication(
client_id=self.client_id,
client_credential=self.client_secret,
authority=f"https://login.microsoftonline.com/{self.tenant_id}",
token_cache=cache
)
else:
# Public client app (device code flow)
app = msal.PublicClientApplication(
client_id=self.client_id,
authority=f"https://login.microsoftonline.com/{self.tenant_id}",
token_cache=cache
)
return app
def _save_token_cache(self):
"""Save token cache to file"""
try:
if self.app.token_cache.has_state_changed:
with open(self.token_cache_file, 'w') as f:
f.write(self.app.token_cache.serialize())
except Exception as e:
print(f"⚠️ Could not save token cache: {e}")
async def authenticate(self) -> bool:
"""Authenticate with Microsoft Graph API"""
print("🔐 Authenticating with Microsoft Graph API...")
# Try to get token silently first
accounts = self.app.get_accounts()
if accounts:
try:
result = self.app.acquire_token_silent(self.scopes, account=accounts[0])
if result and "access_token" in result:
self.access_token = result["access_token"]
self.token_expiry = datetime.now() + timedelta(seconds=result.get("expires_in", 3600))
self._save_token_cache()
print("✅ Authentication successful (cached token)")
return True
except Exception as e:
print(f"⚠️ Silent authentication failed: {e}")
# Interactive authentication required
if self.client_secret:
# Client credentials flow (for service applications)
try:
# For client credentials flow, scopes must end with /.default
client_cred_scopes = ["https://graph.microsoft.com/.default"]
result = self.app.acquire_token_for_client(scopes=client_cred_scopes)
if result and "access_token" in result:
self.access_token = result["access_token"]
self.token_expiry = datetime.now() + timedelta(seconds=result.get("expires_in", 3600))
self._save_token_cache()
print("✅ Authentication successful (client credentials)")
return True
else:
print(f"❌ Authentication failed: {result.get('error_description', 'Unknown error')}")
return False
except Exception as e:
print(f"❌ Authentication error: {e}")
return False
else:
# Device code flow (for interactive applications)
try:
flow = self.app.initiate_device_flow(scopes=self.scopes)
if "user_code" not in flow:
print("❌ Failed to create device flow")
return False
print(f"🔗 Please visit: {flow['verification_uri']}")
print(f"📱 Enter code: {flow['user_code']}")
print("⏳ Waiting for authentication...")
result = self.app.acquire_token_by_device_flow(flow)
if result and "access_token" in result:
self.access_token = result["access_token"]
self.token_expiry = datetime.now() + timedelta(seconds=result.get("expires_in", 3600))
self._save_token_cache()
print("✅ Authentication successful (device flow)")
return True
else:
print(f"❌ Authentication failed: {result.get('error_description', 'Unknown error')}")
return False
except Exception as e:
print(f"❌ Authentication error: {e}")
return False
def _get_headers(self) -> Dict[str, str]:
"""Get headers for Graph API requests"""
if not self.access_token:
raise ValueError("Not authenticated - call authenticate() first")
return {
'Authorization': f'Bearer {self.access_token}',
'Content-Type': 'application/json'
}
async def _ensure_authenticated(self):
"""Ensure we have a valid access token"""
if not self.access_token or (self.token_expiry and datetime.now() >= self.token_expiry):
await self.authenticate()
async def get_drive_info(self) -> Optional[Dict[str, Any]]:
"""Get information about the user's OneDrive"""
await self._ensure_authenticated()
try:
response = requests.get(
f"{self.graph_url}/{self.drive_endpoint}",
headers=self._get_headers()
)
response.raise_for_status()
return response.json()
except Exception as e:
print(f"❌ Failed to get drive info: {e}")
return None
async def create_folder(self, folder_path: str) -> Optional[str]:
"""
Create a folder in OneDrive
Args:
folder_path: Path like '/E-Ink LLM Input' or '/My Folder/Subfolder'
Returns:
Folder ID if successful, None otherwise
"""
await self._ensure_authenticated()
# Clean and split path
path_parts = [part for part in folder_path.strip('/').split('/') if part]
if not path_parts:
return None
current_parent = "root"
try:
for folder_name in path_parts:
# Check if folder exists
existing_folder = await self._find_item_in_folder(current_parent, folder_name, "folder")
if existing_folder:
current_parent = existing_folder['id']
else:
# Create the folder
folder_data = {
"name": folder_name,
"folder": {},
"@microsoft.graph.conflictBehavior": "rename"
}
response = requests.post(
f"{self.graph_url}/{self.drive_endpoint}/items/{current_parent}/children",
headers=self._get_headers(),
json=folder_data
)
response.raise_for_status()
new_folder = response.json()
current_parent = new_folder['id']
print(f"📁 Created OneDrive folder: {folder_name}")
return current_parent
except Exception as e:
print(f"❌ Failed to create folder {folder_path}: {e}")
return None
async def _find_item_in_folder(self, folder_id: str, item_name: str, item_type: str = None) -> Optional[Dict[str, Any]]:
"""Find an item (file or folder) in a specific folder"""
try:
response = requests.get(
f"{self.graph_url}/{self.drive_endpoint}/items/{folder_id}/children",
headers=self._get_headers()
)
response.raise_for_status()
items = response.json().get('value', [])
for item in items:
if item['name'] == item_name:
if item_type is None:
return item
elif item_type == "folder" and 'folder' in item:
return item
elif item_type == "file" and 'file' in item:
return item
return None
except Exception as e:
print(f"❌ Failed to find item {item_name}: {e}")
return None
async def get_folder_id(self, folder_path: str) -> Optional[str]:
"""Get folder ID by path, creating if necessary"""
if not folder_path or folder_path == '/':
return "root"
# Try to find existing folder first
path_parts = [part for part in folder_path.strip('/').split('/') if part]
current_parent = "root"
try:
for folder_name in path_parts:
folder = await self._find_item_in_folder(current_parent, folder_name, "folder")
if folder:
current_parent = folder['id']
else:
# Folder doesn't exist, create it
return await self.create_folder(folder_path)
return current_parent
except Exception as e:
print(f"❌ Failed to get folder ID for {folder_path}: {e}")
return None
async def list_files_in_folder(self, folder_path: str, file_extensions: List[str] = None) -> List[Dict[str, Any]]:
"""
List files in a OneDrive folder
Args:
folder_path: Path like '/E-Ink LLM Input'
file_extensions: List of extensions to filter by (e.g., ['.pdf', '.jpg'])
Returns:
List of file information dictionaries
"""
await self._ensure_authenticated()
folder_id = await self.get_folder_id(folder_path)
if not folder_id:
return []
try:
response = requests.get(
f"{self.graph_url}/{self.drive_endpoint}/items/{folder_id}/children",
headers=self._get_headers()
)
response.raise_for_status()
items = response.json().get('value', [])
files = []
for item in items:
if 'file' in item: # It's a file, not a folder
if file_extensions:
file_ext = Path(item['name']).suffix.lower()
if file_ext not in file_extensions:
continue
files.append({
'id': item['id'],
'name': item['name'],
'size': item['size'],
'modified': item['lastModifiedDateTime'],
'download_url': item.get('@microsoft.graph.downloadUrl'),
'path': f"{folder_path.rstrip('/')}/{item['name']}"
})
return files
except Exception as e:
print(f"❌ Failed to list files in {folder_path}: {e}")
return []
async def download_file(self, file_info: Dict[str, Any], local_path: str) -> bool:
"""
Download a file from OneDrive
Args:
file_info: File information from list_files_in_folder
local_path: Local path to save the file
Returns:
True if successful, False otherwise
"""
try:
download_url = file_info.get('download_url')
if not download_url:
# Get download URL if not provided
await self._ensure_authenticated()
response = requests.get(
f"{self.graph_url}/{self.drive_endpoint}/items/{file_info['id']}",
headers=self._get_headers()
)
response.raise_for_status()
download_url = response.json().get('@microsoft.graph.downloadUrl')
if not download_url:
print(f"❌ No download URL for {file_info['name']}")
return False
# Download the file
response = requests.get(download_url)
response.raise_for_status()
# Save to local path
local_file = Path(local_path)
local_file.parent.mkdir(parents=True, exist_ok=True)
with open(local_file, 'wb') as f:
f.write(response.content)
print(f"📥 Downloaded: {file_info['name']} -> {local_path}")
return True
except Exception as e:
print(f"❌ Failed to download {file_info['name']}: {e}")
return False
async def upload_file(self, local_path: str, onedrive_folder_path: str, filename: str = None) -> bool:
"""
Upload a file to OneDrive
Args:
local_path: Path to local file
onedrive_folder_path: OneDrive folder path like '/E-Ink LLM Output'
filename: Optional filename override
Returns:
True if successful, False otherwise
"""
await self._ensure_authenticated()
local_file = Path(local_path)
if not local_file.exists():
print(f"❌ Local file not found: {local_path}")
return False
upload_name = filename or local_file.name
folder_id = await self.get_folder_id(onedrive_folder_path)
if not folder_id:
print(f"❌ Could not access OneDrive folder: {onedrive_folder_path}")
return False
try:
# For small files (< 4MB), use simple upload
file_size = local_file.stat().st_size
if file_size < 4 * 1024 * 1024: # 4MB
with open(local_file, 'rb') as f:
response = requests.put(
f"{self.graph_url}/{self.drive_endpoint}/items/{folder_id}:/{upload_name}:/content",
headers={'Authorization': f'Bearer {self.access_token}'},
data=f.read()
)
response.raise_for_status()
print(f"📤 Uploaded: {local_path} -> {onedrive_folder_path}/{upload_name}")
return True
else:
# For large files, use upload session
return await self._upload_large_file(local_file, folder_id, upload_name)
except Exception as e:
print(f"❌ Failed to upload {local_path}: {e}")
return False
async def _upload_large_file(self, local_file: Path, folder_id: str, filename: str) -> bool:
"""Upload large file using upload session"""
try:
# Create upload session
session_data = {
"item": {
"@microsoft.graph.conflictBehavior": "replace",
"name": filename
}
}
response = requests.post(
f"{self.graph_url}/{self.drive_endpoint}/items/{folder_id}:/{filename}:/createUploadSession",
headers=self._get_headers(),
json=session_data
)
response.raise_for_status()
upload_url = response.json()['uploadUrl']
file_size = local_file.stat().st_size
chunk_size = 320 * 1024 # 320KB chunks
with open(local_file, 'rb') as f:
uploaded = 0
while uploaded < file_size:
chunk = f.read(chunk_size)
if not chunk:
break
chunk_start = uploaded
chunk_end = min(uploaded + len(chunk) - 1, file_size - 1)
headers = {
'Content-Range': f'bytes {chunk_start}-{chunk_end}/{file_size}',
'Content-Length': str(len(chunk))
}
response = requests.put(upload_url, headers=headers, data=chunk)
response.raise_for_status()
uploaded += len(chunk)
print(f"📤 Upload progress: {uploaded}/{file_size} bytes ({uploaded/file_size*100:.1f}%)")
print(f"📤 Large file uploaded: {filename}")
return True
except Exception as e:
print(f"❌ Failed to upload large file {filename}: {e}")
return False
async def delete_file(self, file_info: Dict[str, Any]) -> bool:
"""Delete a file from OneDrive"""
await self._ensure_authenticated()
try:
response = requests.delete(
f"{self.graph_url}/{self.drive_endpoint}/items/{file_info['id']}",
headers=self._get_headers()
)
response.raise_for_status()
print(f"🗑️ Deleted from OneDrive: {file_info['name']}")
return True
except Exception as e:
print(f"❌ Failed to delete {file_info['name']}: {e}")
return False
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
config: Dictionary containing OneDrive/Azure configuration. Required keys: 'client_id' (Azure App Registration client ID). Optional keys: 'client_secret' (for confidential client apps), 'tenant_id' (defaults to 'common'), 'redirect_uri' (defaults to 'http://localhost:8080'), 'scopes' (list of Graph API scopes, defaults to ['https://graph.microsoft.com/Files.ReadWrite.All']), 'user_principal_name' (user email for client credentials flow), 'token_cache_file' (path to token cache file, defaults to 'onedrive_token_cache.json')
Return Value
The constructor returns an initialized OneDriveClient instance. Key methods return: authenticate() returns bool (success/failure), get_drive_info() returns Optional[Dict] with drive metadata, create_folder() returns Optional[str] folder ID, list_files_in_folder() returns List[Dict] of file information, download_file() returns bool, upload_file() returns bool, delete_file() returns bool. File information dictionaries contain keys: 'id', 'name', 'size', 'modified', 'download_url', 'path'.
Class Interface
Methods
__init__(self, config: Dict[str, Any])
Purpose: Initialize the OneDrive client with configuration and set up MSAL authentication
Parameters:
config: Dictionary with client_id (required), client_secret, tenant_id, redirect_uri, scopes, user_principal_name, token_cache_file
Returns: None - initializes instance
_create_msal_app(self)
Purpose: Create and configure MSAL application instance (ConfidentialClientApplication or PublicClientApplication)
Returns: MSAL application instance with token cache loaded
_save_token_cache(self)
Purpose: Save the current token cache to disk for reuse across sessions
Returns: None - saves cache to file
async authenticate(self) -> bool
Purpose: Authenticate with Microsoft Graph API using cached token, client credentials, or device code flow
Returns: True if authentication successful, False otherwise
_get_headers(self) -> Dict[str, str]
Purpose: Generate HTTP headers with authorization token for Graph API requests
Returns: Dictionary with Authorization and Content-Type headers
async _ensure_authenticated(self)
Purpose: Check if access token is valid and refresh if expired
Returns: None - ensures valid token or re-authenticates
async get_drive_info(self) -> Optional[Dict[str, Any]]
Purpose: Retrieve information about the user's OneDrive (owner, quota, drive type)
Returns: Dictionary with drive information or None on failure
async create_folder(self, folder_path: str) -> Optional[str]
Purpose: Create a folder hierarchy in OneDrive, creating parent folders as needed
Parameters:
folder_path: Path like '/E-Ink LLM Input' or '/My Folder/Subfolder'
Returns: Folder ID of the created/existing folder, or None on failure
async _find_item_in_folder(self, folder_id: str, item_name: str, item_type: str = None) -> Optional[Dict[str, Any]]
Purpose: Search for a specific file or folder by name within a parent folder
Parameters:
folder_id: Parent folder ID to search initem_name: Name of the item to finditem_type: Optional filter: 'folder' or 'file'
Returns: Item information dictionary or None if not found
async get_folder_id(self, folder_path: str) -> Optional[str]
Purpose: Get folder ID by path, creating the folder hierarchy if it doesn't exist
Parameters:
folder_path: Path like '/MyFolder/SubFolder'
Returns: Folder ID or None on failure
async list_files_in_folder(self, folder_path: str, file_extensions: List[str] = None) -> List[Dict[str, Any]]
Purpose: List all files in a OneDrive folder, optionally filtered by file extension
Parameters:
folder_path: Path like '/E-Ink LLM Input'file_extensions: Optional list of extensions to filter (e.g., ['.pdf', '.jpg'])
Returns: List of file information dictionaries with id, name, size, modified, download_url, path
async download_file(self, file_info: Dict[str, Any], local_path: str) -> bool
Purpose: Download a file from OneDrive to local storage
Parameters:
file_info: File information dictionary from list_files_in_folderlocal_path: Local path where file should be saved
Returns: True if download successful, False otherwise
async upload_file(self, local_path: str, onedrive_folder_path: str, filename: str = None) -> bool
Purpose: Upload a file to OneDrive, automatically handling large files with chunked upload
Parameters:
local_path: Path to local file to uploadonedrive_folder_path: OneDrive folder path like '/E-Ink LLM Output'filename: Optional filename override (defaults to local filename)
Returns: True if upload successful, False otherwise
async _upload_large_file(self, local_file: Path, folder_id: str, filename: str) -> bool
Purpose: Upload large files (>4MB) using chunked upload session with progress tracking
Parameters:
local_file: Path object to local filefolder_id: OneDrive folder IDfilename: Name for uploaded file
Returns: True if upload successful, False otherwise
async delete_file(self, file_info: Dict[str, Any]) -> bool
Purpose: Delete a file from OneDrive
Parameters:
file_info: File information dictionary with 'id' key
Returns: True if deletion successful, False otherwise
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
config |
Dict[str, Any] | Complete configuration dictionary passed to constructor | instance |
client_id |
str | Azure App Registration client ID | instance |
client_secret |
Optional[str] | Azure App Registration client secret (for confidential clients) | instance |
tenant_id |
str | Azure tenant ID (defaults to 'common') | instance |
redirect_uri |
str | OAuth redirect URI (defaults to 'http://localhost:8080') | instance |
scopes |
List[str] | List of Microsoft Graph API scopes required | instance |
user_principal_name |
Optional[str] | User email for client credentials flow | instance |
token_cache_file |
Path | Path to token cache file for persistent authentication | instance |
app |
Union[msal.ConfidentialClientApplication, msal.PublicClientApplication] | MSAL application instance for authentication | instance |
access_token |
Optional[str] | Current OAuth access token for Graph API requests | instance |
token_expiry |
Optional[datetime] | Expiration time of current access token | instance |
graph_url |
str | Microsoft Graph API base URL (https://graph.microsoft.com/v1.0) | instance |
drive_endpoint |
str | Graph API endpoint for drive access (me/drive or users/{upn}/drive) | instance |
Dependencies
msalrequestspathlibtypingdatetimejsonos
Required Imports
import msal
import requests
from pathlib import Path
from typing import Dict, List, Optional, Any
from datetime import datetime, timedelta
Usage Example
# Interactive authentication (device code flow)
config = {
'client_id': 'your-azure-app-client-id',
'scopes': ['https://graph.microsoft.com/Files.ReadWrite.All']
}
client = OneDriveClient(config)
# Authenticate (will prompt for device code)
await client.authenticate()
# Get drive information
drive_info = await client.get_drive_info()
print(f"Drive owner: {drive_info['owner']['user']['displayName']}")
# Create folder
folder_id = await client.create_folder('/MyApp/Documents')
# List files in folder
files = await client.list_files_in_folder('/MyApp/Documents', file_extensions=['.pdf', '.docx'])
for file in files:
print(f"Found: {file['name']} ({file['size']} bytes)")
# Download a file
if files:
await client.download_file(files[0], './downloads/document.pdf')
# Upload a file
await client.upload_file('./local/report.pdf', '/MyApp/Documents')
# Delete a file
if files:
await client.delete_file(files[0])
# Service application example (client credentials flow)
service_config = {
'client_id': 'your-client-id',
'client_secret': 'your-client-secret',
'tenant_id': 'your-tenant-id',
'user_principal_name': 'user@domain.com'
}
service_client = OneDriveClient(service_config)
await service_client.authenticate()
Best Practices
- Always call authenticate() before performing any file operations
- The client automatically handles token refresh through _ensure_authenticated(), but initial authentication is required
- Token cache is automatically saved to disk for reuse across sessions
- For service applications, use client_secret and user_principal_name in config
- For interactive applications, omit client_secret to use device code flow
- Large files (>4MB) are automatically uploaded using chunked upload sessions
- File operations are async and should be awaited
- The client maintains state (access_token, token_expiry) that persists across method calls
- Folder paths should use forward slashes (e.g., '/MyFolder/SubFolder')
- The client automatically creates folders if they don't exist when uploading or getting folder IDs
- Error handling prints messages but returns None/False/empty list on failure - check return values
- For client credentials flow, scopes are automatically converted to /.default format
- The drive_endpoint is automatically set based on authentication method (me/drive vs users/{upn}/drive)
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function test_onedrive_auth 66.9% similar
-
class O365Client 66.6% similar
-
class OneDriveProcessor 61.1% similar
-
class FileCloudClient_v1 60.6% similar
-
class AzureSSO_v1 60.1% similar