class FileCloudClient_v1
A client class for interacting with FileCloud storage systems through direct API calls, providing authentication, file search, download, and metadata retrieval capabilities.
/tf/active/vicechatdev/contract_validity_analyzer/utils/filecloud_client.py
17 - 383
moderate
Purpose
FileCloudClient provides a comprehensive interface for connecting to FileCloud servers and performing file operations. It handles authentication, maintains session state, searches for documents with filtering capabilities, downloads files (to memory or temporary files), retrieves file metadata, and generates access URLs. The class is designed to work with FileCloud's REST API endpoints and supports context manager protocol for automatic connection/disconnection management.
Source Code
class FileCloudClient:
"""Client for interacting with FileCloud storage using direct API calls."""
def __init__(self, config: Dict[str, Any]):
"""
Initialize FileCloud client.
Args:
config: Configuration dictionary with server_url, username, password
"""
self.config = config
self.server_url = config['server_url'].rstrip('/')
self.username = config['username']
self.password = config['password']
self.base_path = config.get('base_path', '/')
# Initialize session for persistent connections
self.session = requests.Session()
self.headers = {'Accept': 'application/json'}
self.authenticated = False
def connect(self) -> bool:
"""
Connect and authenticate with FileCloud server.
Returns:
True if successful, False otherwise
"""
try:
login_endpoint = '/core/loginguest'
credentials = {'userid': self.username, 'password': self.password}
response = self.session.post(
f"{self.server_url}{login_endpoint}",
params=credentials,
headers=self.headers
)
if response.status_code == 200:
# Check for successful login
try:
result = response.json()
if result.get('command', [{}])[0].get('result') == 1:
self.authenticated = True
logger.info("Successfully connected to FileCloud server")
return True
else:
logger.error(f"Login failed: {result.get('command', [{}])[0].get('message', 'Unknown error')}")
return False
except:
# Some FileCloud responses might not be JSON
if 'success' in response.text.lower() or response.status_code == 200:
self.authenticated = True
logger.info("Successfully connected to FileCloud server")
return True
else:
logger.error("Login failed - invalid response format")
return False
else:
logger.error(f"Login failed with status code: {response.status_code}")
return False
except Exception as e:
logger.error(f"Error connecting to FileCloud: {e}")
return False
def _ensure_authenticated(self):
"""Ensure the client is authenticated."""
if not self.authenticated:
if not self.connect():
raise Exception("Failed to connect to FileCloud server")
def search_documents(self, path: str = None, extensions: List[str] = None) -> List[Dict[str, Any]]:
"""
Search for documents in the specified path using FileCloud search API.
Args:
path: Path to search in (defaults to base_path)
extensions: List of file extensions to filter by
Returns:
List of document information dictionaries
"""
self._ensure_authenticated()
search_path = path or self.base_path
extensions = extensions or ['.pdf', '.doc', '.docx']
try:
logger.info(f"Searching for documents in: {search_path}")
# Use FileCloud search API to find all files
search_endpoint = '/core/dosearch'
params = {
'searchstring': '**', # Search for all files
'searchloc': search_path,
'searchresulttype': 'file',
'limit': '10000',
'maxsearchentries': '10000',
'refresh': 1
}
response = self.session.post(
f"{self.server_url}{search_endpoint}",
params=params,
cookies=self.session.cookies
)
# Wait for search to complete if it's in progress
while 'INPROGRESS' in response.text:
logger.info("Search is in progress, waiting for completion...")
time.sleep(5)
params['refresh'] = 0
response = self.session.post(
f"{self.server_url}{search_endpoint}",
params=params,
cookies=self.session.cookies
)
documents = []
if response.status_code == 200:
try:
# Parse XML response
root = ET.fromstring(response.text)
for entry in root:
if entry.tag == 'entry':
file_path = None
file_name = None
file_size = 0
created_date = None
modified_date = None
for subentry in entry:
if subentry.tag == 'path':
file_path = subentry.text
file_name = os.path.basename(file_path) if file_path else ''
elif subentry.tag == 'size':
try:
file_size = int(subentry.text) if subentry.text else 0
except:
file_size = 0
elif subentry.tag == 'createdate':
created_date = subentry.text
elif subentry.tag == 'modifieddate':
modified_date = subentry.text
# Filter by extension
if file_path and file_name:
if any(file_name.lower().endswith(ext.lower()) for ext in extensions):
documents.append({
'filename': file_name,
'path': os.path.dirname(file_path),
'full_path': file_path,
'size': file_size,
'created_date': created_date,
'modified_date': modified_date,
'type': 'file'
})
except ET.ParseError as e:
logger.error(f"Error parsing XML response: {e}")
logger.debug(f"Response text: {response.text[:1000]}")
except Exception as e:
logger.error(f"Error processing search results: {e}")
# Remove duplicates based on full_path
seen_paths = set()
unique_documents = []
for doc in documents:
if doc['full_path'] not in seen_paths:
seen_paths.add(doc['full_path'])
unique_documents.append(doc)
logger.info(f"Found {len(unique_documents)} unique documents")
return unique_documents
except Exception as e:
logger.error(f"Error searching for documents: {e}")
return []
def download_document(self, file_path: str) -> Optional[bytes]:
"""
Download a document from FileCloud.
Args:
file_path: Full path to the file in FileCloud
Returns:
File content as bytes, or None if failed
"""
self._ensure_authenticated()
try:
logger.debug(f"Downloading document: {file_path}")
# Use direct FileCloud API call for download
api_params = {
'filepath': file_path,
'filename': os.path.basename(file_path)
}
download_endpoint = '/core/downloadfile'
response = self.session.post(
f"{self.server_url}{download_endpoint}",
params=api_params,
cookies=self.session.cookies
)
if response.status_code == 200:
logger.debug(f"Successfully downloaded {len(response.content)} bytes")
return response.content
else:
logger.error(f"Failed to download file: status code {response.status_code}")
return None
except Exception as e:
logger.error(f"Error downloading document {file_path}: {e}")
return None
def download_to_temp_file(self, file_path: str) -> Optional[str]:
"""
Download a document to a temporary file.
Args:
file_path: Full path to the file in FileCloud
Returns:
Path to temporary file, or None if failed
"""
content = self.download_document(file_path)
if content is None:
return None
try:
# Get file extension
_, ext = os.path.splitext(file_path)
# Create temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
temp_file.write(content)
temp_path = temp_file.name
logger.debug(f"Downloaded to temporary file: {temp_path}")
return temp_path
except Exception as e:
logger.error(f"Error creating temporary file: {e}")
return None
def get_file_info(self, file_path: str) -> Optional[Dict[str, Any]]:
"""
Get file metadata from FileCloud using the fileinfo API.
Args:
file_path: Full path to the file in FileCloud
Returns:
Dictionary with file metadata or None if failed
"""
self._ensure_authenticated()
try:
logger.debug(f"Getting file info for: {file_path}")
info_endpoint = '/core/fileinfo'
params = {'file': file_path}
response = self.session.post(
f"{self.server_url}{info_endpoint}",
params=params,
cookies=self.session.cookies
)
if response.status_code == 200:
# Parse XML response using xmltodict (same as filecloud_wuxi_sync.py)
import xmltodict
doc = xmltodict.parse(response.text)
# Check if fileinfo exists and has entry
if doc.get('fileinfo') and doc['fileinfo'] is not None:
entry = doc['fileinfo'].get('entry')
if entry:
logger.debug(f"Retrieved file info: {entry}")
return entry
else:
logger.warning(f"No entry found in fileinfo for {file_path}")
return None
else:
logger.warning(f"File not found in FileCloud: {file_path}")
return None
else:
logger.error(f"Failed to get file info: {response.status_code}")
return None
except Exception as e:
logger.error(f"Error getting file info for {file_path}: {e}")
import traceback
traceback.print_exc()
return None
def check_file_exists(self, file_path: str) -> bool:
"""
Check if a file exists in FileCloud.
Args:
file_path: Full path to the file
Returns:
True if file exists, False otherwise
"""
info = self.get_file_info(file_path)
return info is not None
def disconnect(self):
"""Disconnect from FileCloud server."""
try:
if self.authenticated and self.session:
# FileCloud logout endpoint
logout_endpoint = '/core/logout'
self.session.post(
f"{self.server_url}{logout_endpoint}",
cookies=self.session.cookies
)
self.authenticated = False
logger.info("Disconnected from FileCloud server")
except Exception as e:
logger.error(f"Error disconnecting from FileCloud: {e}")
finally:
if self.session:
self.session.close()
def get_document_url(self, file_path: str) -> str:
"""
Generate a direct URL to view/download a document in FileCloud.
Args:
file_path: Full path to the file in FileCloud
Returns:
URL string to access the document
"""
# FileCloud web interface URL format
# Format: https://server/ui/core/index.html?filter=filename#expl-tabl./encoded/path
import urllib.parse
# Get filename for filter parameter
filename = file_path.split('/')[-1]
# Get directory path (without filename)
dir_path = '/'.join(file_path.split('/')[:-1])
# Encode the directory path for the hash (spaces become +, special chars encoded)
encoded_dir = urllib.parse.quote(dir_path, safe='/')
encoded_dir = encoded_dir.replace('%20', '+')
# Construct the URL
return f"{self.server_url}/ui/core/index.html?filter={filename}#expl-tabl.{encoded_dir}"
def __enter__(self):
"""Context manager entry."""
self.connect()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.disconnect()
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
config: Dictionary containing FileCloud server configuration. Required keys: 'server_url' (base URL of FileCloud server), 'username' (login username), 'password' (login password). Optional keys: 'base_path' (default search path, defaults to '/'). The server_url will have trailing slashes automatically removed.
Return Value
Instantiation returns a FileCloudClient object. Key method returns: connect() returns bool (True if authenticated successfully), search_documents() returns List[Dict] with file metadata (filename, path, full_path, size, created_date, modified_date, type), download_document() returns Optional[bytes] (file content or None), download_to_temp_file() returns Optional[str] (temp file path or None), get_file_info() returns Optional[Dict] (file metadata or None), check_file_exists() returns bool, get_document_url() returns str (web interface URL).
Class Interface
Methods
__init__(self, config: Dict[str, Any])
Purpose: Initialize the FileCloud client with server configuration and set up session management
Parameters:
config: Dictionary with 'server_url', 'username', 'password', and optional 'base_path'
Returns: None - initializes instance attributes
connect(self) -> bool
Purpose: Authenticate with the FileCloud server and establish a session
Returns: True if authentication successful, False otherwise. Sets self.authenticated flag.
_ensure_authenticated(self)
Purpose: Internal method to verify authentication state and connect if needed
Returns: None - raises Exception if connection fails
search_documents(self, path: str = None, extensions: List[str] = None) -> List[Dict[str, Any]]
Purpose: Search for documents in FileCloud with optional path and extension filtering
Parameters:
path: Directory path to search in (defaults to self.base_path if None)extensions: List of file extensions to filter by (defaults to ['.pdf', '.doc', '.docx'])
Returns: List of dictionaries containing file metadata: filename, path, full_path, size, created_date, modified_date, type. Returns empty list on error.
download_document(self, file_path: str) -> Optional[bytes]
Purpose: Download a file from FileCloud and return its content as bytes
Parameters:
file_path: Full path to the file in FileCloud (e.g., '/Documents/report.pdf')
Returns: File content as bytes if successful, None if download fails
download_to_temp_file(self, file_path: str) -> Optional[str]
Purpose: Download a file to a temporary file on disk and return the path
Parameters:
file_path: Full path to the file in FileCloud
Returns: Path to the temporary file as string if successful, None if failed. Caller must delete the temp file.
get_file_info(self, file_path: str) -> Optional[Dict[str, Any]]
Purpose: Retrieve metadata for a specific file using FileCloud's fileinfo API
Parameters:
file_path: Full path to the file in FileCloud
Returns: Dictionary with file metadata (parsed from XML response) if file exists, None if file not found or error occurs
check_file_exists(self, file_path: str) -> bool
Purpose: Check whether a file exists in FileCloud
Parameters:
file_path: Full path to the file to check
Returns: True if file exists, False otherwise
disconnect(self)
Purpose: Log out from FileCloud server and close the session
Returns: None - sets self.authenticated to False and closes session
get_document_url(self, file_path: str) -> str
Purpose: Generate a web interface URL for viewing/accessing a document in FileCloud
Parameters:
file_path: Full path to the file in FileCloud
Returns: URL string to access the document through FileCloud web interface
__enter__(self)
Purpose: Context manager entry - connects to FileCloud
Returns: self (the FileCloudClient instance)
__exit__(self, exc_type, exc_val, exc_tb)
Purpose: Context manager exit - disconnects from FileCloud
Parameters:
exc_type: Exception type if exception occurredexc_val: Exception value if exception occurredexc_tb: Exception traceback if exception occurred
Returns: None - always disconnects regardless of exceptions
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
config |
Dict[str, Any] | Original configuration dictionary passed to __init__ | instance |
server_url |
str | FileCloud server base URL with trailing slashes removed | instance |
username |
str | FileCloud username for authentication | instance |
password |
str | FileCloud password for authentication | instance |
base_path |
str | Default base path for file operations (defaults to '/') | instance |
session |
requests.Session | Persistent HTTP session object for maintaining cookies and connections | instance |
headers |
Dict[str, str] | Default HTTP headers for API requests (includes 'Accept: application/json') | instance |
authenticated |
bool | Flag indicating whether the client is currently authenticated with FileCloud | instance |
Dependencies
loggingossysrequestsxml.etree.ElementTreetimetypingtempfilepathliburllib.parsexmltodicttraceback
Required Imports
import logging
import os
import requests
import xml.etree.ElementTree as ET
import time
from typing import Dict, List, Any, Optional
import tempfile
import urllib.parse
Conditional/Optional Imports
These imports are only needed under specific conditions:
import xmltodict
Condition: only when calling get_file_info() method to parse XML responses
Required (conditional)import traceback
Condition: only for error logging in get_file_info() exception handling
OptionalUsage Example
# Basic usage
config = {
'server_url': 'https://filecloud.example.com',
'username': 'myuser',
'password': 'mypassword',
'base_path': '/Documents'
}
client = FileCloudClient(config)
if client.connect():
# Search for PDF documents
docs = client.search_documents(extensions=['.pdf'])
for doc in docs:
print(f"Found: {doc['filename']} at {doc['full_path']}")
# Download a specific file
if docs:
content = client.download_document(docs[0]['full_path'])
if content:
print(f"Downloaded {len(content)} bytes")
# Or download to temp file
temp_path = client.download_to_temp_file(docs[0]['full_path'])
if temp_path:
print(f"Saved to: {temp_path}")
# Check if file exists
exists = client.check_file_exists('/Documents/report.pdf')
# Get file metadata
info = client.get_file_info('/Documents/report.pdf')
# Get web URL
url = client.get_document_url('/Documents/report.pdf')
client.disconnect()
# Using context manager (recommended)
with FileCloudClient(config) as client:
docs = client.search_documents()
for doc in docs:
content = client.download_document(doc['full_path'])
Best Practices
- Always call connect() before using other methods, or use the context manager protocol (with statement) for automatic connection/disconnection
- The client maintains authentication state - check the 'authenticated' attribute or rely on _ensure_authenticated() which is called by most methods
- Use disconnect() to properly close the session when done, or use context manager to handle this automatically
- The session object maintains cookies for authentication across requests - do not create multiple instances for the same server
- search_documents() can return large result sets - consider the limit parameter implications (currently hardcoded to 10000)
- download_to_temp_file() creates files that are not automatically deleted - caller is responsible for cleanup
- File paths in FileCloud should use forward slashes and typically start with '/'
- The class handles XML and JSON responses from FileCloud API - some endpoints return different formats
- Search operations may take time for large directories - the method polls for completion automatically
- Error handling returns None or False rather than raising exceptions in most cases - always check return values
- The get_document_url() method generates web interface URLs, not direct download URLs
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class FileCloudClient 90.8% similar
-
class FileCloudAPI_v1 71.8% similar
-
class FileCloudAPI 70.6% similar
-
function get_filecloud_client 69.5% similar
-
function test_filecloud_connection_v1 66.3% similar