class ClinicalTrialsClient
A client class for searching and retrieving clinical trial data from multiple international sources including ClinicalTrials.gov, EU Clinical Trials Register, WHO ICTRP, and OpenTrials.
/tf/active/vicechatdev/QA_updater/data_access/clinical_trials_client.py
12 - 432
complex
Purpose
This class provides a unified interface for querying clinical trial databases across multiple international registries. It handles API authentication, rate limiting, data retrieval, and response parsing for each source. The client manages credentials, enforces rate limits to comply with API usage policies, and normalizes data from different sources into consistent dictionary structures. It's designed for researchers and applications that need to aggregate clinical trial information from multiple authoritative sources.
Source Code
class ClinicalTrialsClient:
"""Client for accessing clinical trial data from multiple sources."""
def __init__(self, config: ConfigParser):
"""Initialize the client with required credentials."""
self.logger = logging.getLogger(__name__)
self.config = config
# API keys
self.eu_clinical_trials_api_key = self.config.get('api_keys', 'eu_clinical_trials_api_key', fallback=None)
self.who_ictrp_username = self.config.get('api_keys', 'who_ictrp_username', fallback=None)
self.who_ictrp_password = self.config.get('api_keys', 'who_ictrp_password', fallback=None)
# Rate limiting configurations
self.clinicaltrials_rate_limit = float(self.config.get('rate_limits', 'clinicaltrials_rate_limit', fallback=3))
self.eu_clinical_trials_rate_limit = float(self.config.get('rate_limits', 'eu_clinical_trials_rate_limit', fallback=1))
self.who_ictrp_rate_limit = float(self.config.get('rate_limits', 'who_ictrp_rate_limit', fallback=0.2))
self.opentrials_rate_limit = float(self.config.get('rate_limits', 'opentrials_rate_limit', fallback=2))
# For rate limiting
self.last_clinicaltrials_request = 0
self.last_eu_trials_request = 0
self.last_who_ictrp_request = 0
self.last_opentrials_request = 0
self.logger.info("ClinicalTrialsClient initialized.")
def search_clinicaltrials_gov(self, query: str, max_results: int = 10, days_back: int = 90) -> List[Dict[str, Any]]:
"""
Search ClinicalTrials.gov for trials matching the query.
Args:
query: Search terms
max_results: Maximum number of results to return
days_back: Only include results from this many days ago
Returns:
List of trial metadata dictionaries
"""
# Rate limiting
current_time = time.time()
time_since_last = current_time - self.last_clinicaltrials_request
if time_since_last < (1.0/self.clinicaltrials_rate_limit):
time.sleep((1.0/self.clinicaltrials_rate_limit) - time_since_last)
# Calculate date range for filtering
date_from = (datetime.now() - timedelta(days=days_back)).strftime("%m/%d/%Y")
# ClinicalTrials.gov API endpoint
url = "https://clinicaltrials.gov/api/query/study_fields"
# Request parameters
params = {
"expr": query,
"fields": ",".join([
"NCTId", "BriefTitle", "OfficialTitle", "BriefSummary",
"DetailedDescription", "OverallStatus", "StartDate",
"PrimaryCompletionDate", "CompletionDate", "Phase",
"StudyType", "Condition", "Intervention", "Sponsor",
"LastUpdatePostDate", "LeadSponsorName",
"ResponsiblePartyType"
]),
"min_rnk": 1,
"max_rnk": max_results,
"fmt": "json",
"filter.lastUpdatePostDate": f"{date_from},", # From specified date to present
}
try:
response = requests.get(url, params=params)
response.raise_for_status()
data = response.json()
except requests.exceptions.RequestException as e:
self.logger.error(f"ClinicalTrials.gov API error: {e}")
return []
except Exception as e:
self.logger.exception(f"Error during ClinicalTrials.gov API request: {e}")
return []
# Extract field names and values
field_names = data.get("StudyFieldsResponse", {}).get("FieldList", [])
study_data = data.get("StudyFieldsResponse", {}).get("StudyFields", [])
# Process results
trials = []
for study in study_data:
trial = {"source": "clinicaltrials.gov"}
# Map field names to values
for i, field in enumerate(field_names):
# Some fields are lists, handle appropriately
value = study.get("Field", [])[i]
if isinstance(value, list):
if value: # Only add non-empty lists
trial[field] = value
elif value: # Only add non-empty values
trial[field] = value
trials.append(trial)
self.last_clinicaltrials_request = time.time()
return trials
def search_eu_clinical_trials(self, query: str, max_results: int = 10, days_back: int = 90) -> List[Dict[str, Any]]:
"""
Search EU Clinical Trials Register for trials matching the query.
Args:
query: Search terms
max_results: Maximum number of results to return
days_back: Only include results from this many days ago
Returns:
List of trial metadata dictionaries
"""
# Rate limiting
current_time = time.time()
time_since_last = current_time - self.last_eu_trials_request
if time_since_last < (1.0/self.eu_clinical_trials_rate_limit):
time.sleep((1.0/self.eu_clinical_trials_rate_limit) - time_since_last)
# Calculate date for filtering
date_from = (datetime.now() - timedelta(days=days_back)).strftime("%d/%m/%Y")
# EU Clinical Trials Register search endpoint
# Note: The EU Clinical Trials Register doesn't have a public API
# This implementation uses the search form interface
url = "https://www.clinicaltrialsregister.eu/ctr-search/rest/download/full"
# Request parameters
params = {
"query": query,
"dateFrom": date_from,
"maxResults": max_results
}
# Headers
headers = {
"User-Agent": "Mozilla/5.0 (compatible; Research/1.0)",
"Accept": "text/xml, application/xml"
}
if self.eu_clinical_trials_api_key:
headers["Authorization"] = f"Bearer {self.eu_clinical_trials_api_key}"
try:
response = requests.post(url, data=params, headers=headers)
response.raise_for_status()
except requests.exceptions.RequestException as e:
self.logger.error(f"EU Clinical Trials Register API error: {e}")
return []
except Exception as e:
self.logger.exception(f"Error during EU Clinical Trials Register API request: {e}")
return []
# Parse XML response
try:
root = ET.fromstring(response.text)
except ET.ParseError as e:
self.logger.error(f"EU Clinical Trials Register XML parsing error: {e}")
return []
except Exception as e:
self.logger.exception(f"Error during EU Clinical Trials Register XML parsing: {e}")
return []
# Process results
trials = []
for trial in root.findall(".//trial"):
trial_data = {
"source": "eu_clinical_trials",
"eudract_number": self._safe_get_text(trial, ".//eudract_number"),
"title": self._safe_get_text(trial, ".//title"),
"start_date": self._safe_get_text(trial, ".//start_date"),
"sponsor_name": self._safe_get_text(trial, ".//sponsor_name"),
"status": self._safe_get_text(trial, ".//trial_status"),
"therapeutic_area": self._safe_get_text(trial, ".//therapeutic_area"),
"population_age": self._safe_get_text(trial, ".//population_age"),
"gender": self._safe_get_text(trial, ".//gender"),
"trial_results": self._safe_get_text(trial, ".//trial_results_url"),
}
# Get conditions
conditions = trial.findall(".//condition")
if conditions:
trial_data["conditions"] = [condition.text for condition in conditions if condition.text]
# Get countries
countries = trial.findall(".//country")
if countries:
trial_data["countries"] = [country.text for country in countries if country.text]
trials.append(trial_data)
self.last_eu_trials_request = time.time()
return trials
def _safe_get_text(self, element, xpath):
"""Safely get text from XML element."""
found = element.find(xpath)
return found.text if found is not None and found.text is not None else ""
def search_who_ictrp(self, query: str, max_results: int = 10, days_back: int = 90) -> List[Dict[str, Any]]:
"""
Search WHO ICTRP for trials matching the query.
Args:
query: Search terms
max_results: Maximum number of results to return
days_back: Only include results from this many days ago
Returns:
List of trial metadata dictionaries
"""
# Rate limiting
current_time = time.time()
time_since_last = current_time - self.last_who_ictrp_request
if time_since_last < (1.0/self.who_ictrp_rate_limit):
time.sleep((1.0/self.who_ictrp_rate_limit) - time_since_last)
# Calculate date range for filtering
date_from = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d")
# WHO ICTRP API endpoint
# Note: WHO ICTRP doesn't have a public API, but provides data exports for registered users
url = "https://apps.who.int/trialsearch/api/trials/search"
# Basic authentication
auth = None
if self.who_ictrp_username and self.who_ictrp_password:
auth = (self.who_ictrp_username, self.who_ictrp_password)
# Request parameters
params = {
"search": query,
"date_from": date_from,
"count": max_results,
"format": "json"
}
try:
response = requests.get(url, params=params, auth=auth)
response.raise_for_status()
data = response.json()
except requests.exceptions.RequestException as e:
self.logger.error(f"WHO ICTRP API error: {e}")
return []
except (ValueError, KeyError) as e:
self.logger.error(f"WHO ICTRP data parsing error: {e}")
return []
except Exception as e:
self.logger.exception(f"Error during WHO ICTRP API request: {e}")
return []
# Process results
trials = []
for trial in data.get("trials", []):
trial_data = {
"source": "who_ictrp",
"trial_id": trial.get("trial_id"),
"title": trial.get("public_title"),
"scientific_title": trial.get("scientific_title"),
"primary_sponsor": trial.get("primary_sponsor"),
"recruitment_status": trial.get("recruitment_status"),
"date_registration": trial.get("date_registration"),
"date_enrollment": trial.get("date_enrollment"),
"target_size": trial.get("target_size"),
"study_type": trial.get("study_type"),
"primary_outcome": trial.get("primary_outcome"),
"source_register": trial.get("source_register"),
"web_address": trial.get("web_address"),
}
# Add conditions if available
if "conditions" in trial and trial["conditions"]:
trial_data["conditions"] = trial["conditions"]
# Add interventions if available
if "interventions" in trial and trial["interventions"]:
trial_data["interventions"] = trial["interventions"]
trials.append(trial_data)
self.last_who_ictrp_request = time.time()
return trials
def search_opentrials(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
"""
Search OpenTrials for trials matching the query.
Args:
query: Search terms
max_results: Maximum number of results to return
Returns:
List of trial metadata dictionaries
"""
# Rate limiting
current_time = time.time()
time_since_last = current_time - self.last_opentrials_request
if time_since_last < (1.0/self.opentrials_rate_limit):
time.sleep((1.0/self.opentrials_rate_limit) - time_since_last)
# OpenTrials API endpoint
url = "https://api.opentrials.net/v1/search"
# Request parameters
params = {
"q": query,
"page": 1,
"per_page": max_results,
"type": "trial"
}
# Headers
headers = {}
#if OPENTRIALS_API_KEY: # no api key needed
# headers["Authorization"] = f"Bearer {OPENTRIALS_API_KEY}"
try:
response = requests.get(url, params=params, headers=headers)
response.raise_for_status()
data = response.json()
except requests.exceptions.RequestException as e:
self.logger.error(f"OpenTrials API error: {e}")
return []
except (ValueError, KeyError) as e:
self.logger.error(f"OpenTrials data parsing error: {e}")
return []
except Exception as e:
self.logger.exception(f"Error during OpenTrials API request: {e}")
return []
# Process results
trials = []
for item in data.get("items", []):
trial_data = {
"source": "opentrials",
"id": item.get("id"),
"title": item.get("public_title"),
"brief_summary": item.get("brief_summary"),
"registration_date": item.get("registration_date"),
"status": item.get("status"),
"recruitment_status": item.get("recruitment_status"),
"target_sample_size": item.get("target_sample_size"),
"gender": item.get("gender"),
"has_published_results": item.get("has_published_results"),
"identifiers": item.get("identifiers", {}),
}
# Add conditions if available
if "conditions" in item and item["conditions"]:
trial_data["conditions"] = [condition.get("name") for condition in item["conditions"]]
# Add interventions if available
if "interventions" in item and item["interventions"]:
trial_data["interventions"] = [intervention.get("name") for intervention in item["interventions"]]
# Add locations if available
if "locations" in item and item["locations"]:
trial_data["locations"] = [location.get("name") for location in item["locations"]]
# Add organizations if available
if "organisations" in item and item["organisations"]:
trial_data["organisations"] = [org.get("name") for org in item["organisations"]]
# Add source links
if "sources" in item and item["sources"]:
trial_data["source_links"] = [source.get("url") for source in item["sources"]]
trials.append(trial_data)
self.last_opentrials_request = time.time()
return trials
def search_all(self, query: str, max_results_per_source: int = 5, days_back: int = 90) -> List[Dict[str, Any]]:
"""
Search all configured clinical trial sources.
Args:
query: Search terms
max_results_per_source: Maximum results to return per source
days_back: Only include results from this many days ago
Returns:
Combined list of results from all sources
"""
results = []
# ClinicalTrials.gov results
try:
ct_results = self.search_clinicaltrials_gov(query, max_results_per_source, days_back)
results.extend(ct_results)
self.logger.info(f"Retrieved {len(ct_results)} results from ClinicalTrials.gov")
except Exception as e:
self.logger.error(f"ClinicalTrials.gov search error: {e}")
# EU Clinical Trials Register results
try:
eu_results = self.search_eu_clinical_trials(query, max_results_per_source, days_back)
results.extend(eu_results)
self.logger.info(f"Retrieved {len(eu_results)} results from EU Clinical Trials Register")
except Exception as e:
self.logger.error(f"EU Clinical Trials Register search error: {e}")
# WHO ICTRP results
try:
who_results = self.search_who_ictrp(query, max_results_per_source, days_back)
results.extend(who_results)
self.logger.info(f"Retrieved {len(who_results)} results from WHO ICTRP")
except Exception as e:
self.logger.error(f"WHO ICTRP search error: {e}")
# OpenTrials results
try:
ot_results = self.search_opentrials(query, max_results_per_source)
results.extend(ot_results)
self.logger.info(f"Retrieved {len(ot_results)} results from OpenTrials")
except Exception as e:
self.logger.error(f"OpenTrials search error: {e}")
return results
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
config: A ConfigParser object containing configuration settings including API keys (eu_clinical_trials_api_key, who_ictrp_username, who_ictrp_password) and rate limits (clinicaltrials_rate_limit, eu_clinical_trials_rate_limit, who_ictrp_rate_limit, opentrials_rate_limit) for each clinical trial data source. All API keys are optional and will fallback to None if not provided.
Return Value
The constructor returns a ClinicalTrialsClient instance. Search methods return List[Dict[str, Any]] containing trial metadata dictionaries. Each dictionary includes a 'source' field identifying the data source, plus source-specific fields like trial IDs, titles, descriptions, status, dates, sponsors, conditions, and interventions. Empty lists are returned on API errors or when no results are found.
Class Interface
Methods
__init__(self, config: ConfigParser)
Purpose: Initialize the client with configuration including API credentials and rate limits
Parameters:
config: ConfigParser object containing api_keys and rate_limits sections
Returns: None - initializes the ClinicalTrialsClient instance
search_clinicaltrials_gov(self, query: str, max_results: int = 10, days_back: int = 90) -> List[Dict[str, Any]]
Purpose: Search ClinicalTrials.gov for trials matching the query with date filtering
Parameters:
query: Search terms to query the ClinicalTrials.gov databasemax_results: Maximum number of results to return (default: 10)days_back: Only include results updated within this many days (default: 90)
Returns: List of dictionaries containing trial metadata including NCTId, BriefTitle, OfficialTitle, BriefSummary, DetailedDescription, OverallStatus, StartDate, Phase, StudyType, Condition, Intervention, Sponsor, and other fields. Returns empty list on error.
search_eu_clinical_trials(self, query: str, max_results: int = 10, days_back: int = 90) -> List[Dict[str, Any]]
Purpose: Search EU Clinical Trials Register for trials matching the query
Parameters:
query: Search terms to query the EU Clinical Trials Registermax_results: Maximum number of results to return (default: 10)days_back: Only include results from this many days ago (default: 90)
Returns: List of dictionaries containing trial metadata including eudract_number, title, start_date, sponsor_name, status, therapeutic_area, population_age, gender, trial_results, conditions, and countries. Returns empty list on error.
_safe_get_text(self, element, xpath)
Purpose: Safely extract text from an XML element using XPath, returning empty string if not found
Parameters:
element: XML element to search withinxpath: XPath expression to locate the target element
Returns: Text content of the found element, or empty string if element not found or has no text
search_who_ictrp(self, query: str, max_results: int = 10, days_back: int = 90) -> List[Dict[str, Any]]
Purpose: Search WHO International Clinical Trials Registry Platform for trials matching the query
Parameters:
query: Search terms to query the WHO ICTRP databasemax_results: Maximum number of results to return (default: 10)days_back: Only include results from this many days ago (default: 90)
Returns: List of dictionaries containing trial metadata including trial_id, title, scientific_title, primary_sponsor, recruitment_status, date_registration, date_enrollment, target_size, study_type, primary_outcome, source_register, web_address, conditions, and interventions. Returns empty list on error.
search_opentrials(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]
Purpose: Search OpenTrials database for trials matching the query
Parameters:
query: Search terms to query the OpenTrials databasemax_results: Maximum number of results to return (default: 10)
Returns: List of dictionaries containing trial metadata including id, title, brief_summary, registration_date, status, recruitment_status, target_sample_size, gender, has_published_results, identifiers, conditions, interventions, locations, organisations, and source_links. Returns empty list on error.
search_all(self, query: str, max_results_per_source: int = 5, days_back: int = 90) -> List[Dict[str, Any]]
Purpose: Search all configured clinical trial sources and combine results into a single list
Parameters:
query: Search terms to query all clinical trial databasesmax_results_per_source: Maximum results to return from each source (default: 5)days_back: Only include results from this many days ago for sources that support date filtering (default: 90)
Returns: Combined list of trial metadata dictionaries from all sources. Each dictionary includes a 'source' field identifying its origin. Returns partial results if some sources fail.
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
logger |
logging.Logger | Logger instance for recording client operations, errors, and API interactions | instance |
config |
ConfigParser | Configuration object containing API keys and rate limit settings | instance |
eu_clinical_trials_api_key |
Optional[str] | API key for EU Clinical Trials Register (optional, defaults to None) | instance |
who_ictrp_username |
Optional[str] | Username for WHO ICTRP authentication (optional, defaults to None) | instance |
who_ictrp_password |
Optional[str] | Password for WHO ICTRP authentication (optional, defaults to None) | instance |
clinicaltrials_rate_limit |
float | Maximum requests per second for ClinicalTrials.gov (default: 3.0) | instance |
eu_clinical_trials_rate_limit |
float | Maximum requests per second for EU Clinical Trials Register (default: 1.0) | instance |
who_ictrp_rate_limit |
float | Maximum requests per second for WHO ICTRP (default: 0.2) | instance |
opentrials_rate_limit |
float | Maximum requests per second for OpenTrials (default: 2.0) | instance |
last_clinicaltrials_request |
float | Timestamp of the last request to ClinicalTrials.gov for rate limiting (initialized to 0) | instance |
last_eu_trials_request |
float | Timestamp of the last request to EU Clinical Trials Register for rate limiting (initialized to 0) | instance |
last_who_ictrp_request |
float | Timestamp of the last request to WHO ICTRP for rate limiting (initialized to 0) | instance |
last_opentrials_request |
float | Timestamp of the last request to OpenTrials for rate limiting (initialized to 0) | instance |
Dependencies
timerequestspandasxml.etree.ElementTreetypingdatetimebs4configparserlogging
Required Imports
import time
import requests
import pandas as pd
import xml.etree.ElementTree as ET
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
from configparser import ConfigParser
import logging
Usage Example
from configparser import ConfigParser
import logging
# Setup logging
logging.basicConfig(level=logging.INFO)
# Create configuration
config = ConfigParser()
config.add_section('api_keys')
config.set('api_keys', 'eu_clinical_trials_api_key', 'your_key_here')
config.set('api_keys', 'who_ictrp_username', 'your_username')
config.set('api_keys', 'who_ictrp_password', 'your_password')
config.add_section('rate_limits')
config.set('rate_limits', 'clinicaltrials_rate_limit', '3')
config.set('rate_limits', 'eu_clinical_trials_rate_limit', '1')
config.set('rate_limits', 'who_ictrp_rate_limit', '0.2')
config.set('rate_limits', 'opentrials_rate_limit', '2')
# Initialize client
client = ClinicalTrialsClient(config)
# Search a single source
results = client.search_clinicaltrials_gov('cancer immunotherapy', max_results=10, days_back=90)
for trial in results:
print(f"Trial ID: {trial.get('NCTId')}, Title: {trial.get('BriefTitle')}")
# Search all sources
all_results = client.search_all('diabetes', max_results_per_source=5, days_back=30)
print(f"Total results from all sources: {len(all_results)}")
for trial in all_results:
print(f"Source: {trial['source']}, Title: {trial.get('title', 'N/A')}")
Best Practices
- Always provide a properly configured ConfigParser object with at least the rate_limits section to avoid excessive API calls
- API credentials are optional but recommended for full access to EU Clinical Trials and WHO ICTRP data
- The client automatically handles rate limiting - do not make concurrent requests with multiple instances to the same source
- Use the days_back parameter to limit result sets and reduce API load when recent data is sufficient
- Handle empty result lists gracefully as API errors or no matches will return []
- The search_all method is convenient but makes sequential calls to all sources - expect longer execution times
- Each search method updates internal rate limiting timestamps - reuse the same client instance for multiple searches
- Log messages are written at INFO and ERROR levels - configure logging appropriately for your application
- Network errors and API failures are caught and logged but do not raise exceptions - check return values
- Different sources return different field structures - always check for field existence before accessing
- The client maintains state for rate limiting - do not serialize/deserialize instances between requests
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class LiteratureClient 64.7% similar
-
class PatentClient 63.3% similar
-
class CompanyNewsClient 60.2% similar
-
class LLMClient 49.3% similar
-
class FileCloudClient_v1 49.2% similar