class PatentClient
A client class for searching and retrieving patent data from multiple sources including USPTO, EPO (European Patent Office), and The Lens API.
/tf/active/vicechatdev/QA_updater/data_access/patent_client.py
14 - 277
moderate
Purpose
PatentClient provides a unified interface for querying patent databases across three major patent data sources. It handles API authentication, rate limiting, and data normalization. The class manages credentials for each API, enforces rate limits to prevent API throttling, and returns standardized patent metadata dictionaries. It's designed for applications that need to search patent databases programmatically, such as prior art searches, competitive intelligence, or patent monitoring systems.
Source Code
class PatentClient:
"""Client for accessing patent data from multiple sources."""
def __init__(self, config: ConfigParser):
"""Initialize the client with required credentials."""
self.logger = logging.getLogger(__name__)
self.config = config
# API keys
self.uspto_api_key = self.config.get('api_keys', 'uspto_api_key', fallback=None)
self.epo_consumer_key = self.config.get('api_keys', 'epo_consumer_key', fallback=None)
self.epo_consumer_secret = self.config.get('api_keys', 'epo_consumer_secret', fallback=None)
self.lens_api_key = self.config.get('api_keys', 'lens_api_key', fallback=None)
# Rate limiting configurations
self.uspto_rate_limit = float(self.config.get('rate_limits', 'uspto_rate_limit', fallback=5))
self.epo_rate_limit = float(self.config.get('rate_limits', 'epo_rate_limit', fallback=2))
self.lens_rate_limit = float(self.config.get('rate_limits', 'lens_rate_limit', fallback=1))
# For rate limiting
self.last_uspto_request = 0
self.last_epo_request = 0
self.last_lens_request = 0
self.logger.info("PatentClient initialized.")
def search_uspto(self, query: str, max_results: int = 10, days_back: int = 365) -> List[Dict[str, Any]]:
"""
Search USPTO for patents matching the query.
Args:
query: Search terms
max_results: Maximum number of results to return
days_back: Only include patents from this many days ago
Returns:
List of patent metadata dictionaries
"""
# Rate limiting
current_time = time.time()
time_since_last = current_time - self.last_uspto_request
if time_since_last < (1.0/self.uspto_rate_limit):
time.sleep((1.0/self.uspto_rate_limit) - time_since_last)
# Calculate date range for filtering
date_from = (datetime.now() - timedelta(days=days_back)).strftime("%Y%m%d")
# USPTO API endpoint
url = "https://developer.uspto.gov/pteas/v1/applications"
# Request parameters
params = {
"q": f"applText:({query}) AND fileDate:[{date_from} TO 30000101]",
"rows": max_results,
}
# Headers
headers = {}
if self.uspto_api_key:
headers["apikey"] = self.uspto_api_key
try:
response = requests.get(url, params=params, headers=headers)
response.raise_for_status()
data = response.json()
except requests.exceptions.RequestException as e:
self.logger.error(f"USPTO API error: {e}")
return []
except Exception as e:
self.logger.exception(f"Error during USPTO API request: {e}")
return []
# Process results
patents = []
for result in data.get("results", []):
patent = {
"source": "uspto",
"application_id": result.get("applId"),
"invention_title": result.get("inventionTitle"),
"applicant_name": result.get("applicantName"),
"file_date": result.get("fileDate"),
"patent_number": result.get("patentNumber"),
"abstract": result.get("abstract"),
"url": f"https://patents.google.com/patent/{result.get('patentNumber') or result.get('applId')}",
}
patents.append(patent)
self.last_uspto_request = time.time()
return patents
def search_epo(self, query: str, max_results: int = 10, days_back: int = 365) -> List[Dict[str, Any]]:
"""
Search European Patent Office (EPO) for patents matching the query.
Args:
query: Search terms
max_results: Maximum number of results to return
days_back: Only include patents from this many days ago
Returns:
List of patent metadata dictionaries
"""
# EPO API requires authentication
if not (self.epo_consumer_key and self.epo_consumer_secret):
self.logger.warning("EPO API credentials not provided. Skipping EPO search.")
return []
# Rate limiting
current_time = time.time()
time_since_last = current_time - self.last_epo_request
if time_since_last < (1.0/self.epo_rate_limit):
time.sleep((1.0/self.epo_rate_limit) - time_since_last)
# Calculate date range for filtering
date_from = (datetime.now() - timedelta(days=days_back)).strftime("%Y%m%d")
# EPO API endpoint
url = "https://api.epo.org/rest-services/published-data/search"
# Request parameters
params = {
"q": f"pd>={date_from} AND TXT=({query})",
"Range": f"1-{max_results}",
}
# Authentication
auth = (self.epo_consumer_key, self.epo_consumer_secret)
# Headers
headers = {
"Accept": "application/json",
}
try:
response = requests.get(url, params=params, auth=auth, headers=headers)
response.raise_for_status()
data = response.json()
except requests.exceptions.RequestException as e:
self.logger.error(f"EPO API error: {e}")
return []
except Exception as e:
self.logger.exception(f"Error during EPO API request: {e}")
return []
# Process results
patents = []
for result in data.get("results", []).get("result", []):
patent_data = result.get("bibliographic-data", {})
patent = {
"source": "epo",
"publication_number": patent_data.get("publicationNumber", {}).get("@number"),
"title": patent_data.get("title", {}).get("@title"),
"abstract": patent_data.get("abstract", {}).get("@p"),
"filing_date": patent_data.get("filingDate", {}).get("@date"),
"priority_date": patent_data.get("priorityDate", {}).get("@date"),
"url": f"https://worldwide.espacenet.com/patent/search?q={patent_data.get('publicationNumber', {}).get('@number')}",
}
patents.append(patent)
self.last_epo_request = time.time()
return patents
def search_lens(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
"""
Search The Lens API for patents matching the query.
Args:
query: Search terms
max_results: Maximum number of results to return
Returns:
List of patent metadata dictionaries
"""
# Lens API requires authentication
if not self.lens_api_key:
self.logger.warning("Lens API key not provided. Skipping Lens search.")
return []
# Rate limiting
current_time = time.time()
time_since_last = current_time - self.last_lens_request
if time_since_last < (1.0/self.lens_rate_limit):
time.sleep((1.0/self.lens_rate_limit) - time_since_last)
# Lens API endpoint
url = "https://api.lens.org/lens/search"
# Request parameters
params = {
"q": query,
"size": max_results,
"apikey": self.lens_api_key,
}
try:
response = requests.get(url, params=params)
response.raise_for_status()
data = response.json()
except requests.exceptions.RequestException as e:
self.logger.error(f"Lens API error: {e}")
return []
except Exception as e:
self.logger.exception(f"Error during Lens API request: {e}")
return []
# Process results
patents = []
for hit in data.get("hits", []):
doc = hit.get("_source", {})
patent = {
"source": "lens",
"lens_id": doc.get("lens_id"),
"title": doc.get("title"),
"abstract": doc.get("abstract"),
"publication_date": doc.get("date_published"),
"priority_date": doc.get("priority_date"),
"url": f"https://www.lens.org/lens/patent/{doc.get('lens_id')}",
}
patents.append(patent)
self.last_lens_request = time.time()
return patents
def search_all(self, query: str, max_results_per_source: int = 5, days_back: int = 365) -> List[Dict[str, Any]]:
"""
Search all configured patent sources.
Args:
query: Search terms
max_results_per_source: Maximum results to return per source
days_back: Only include patents from this many days ago
Returns:
Combined list of results from all sources
"""
results = []
# USPTO results
try:
uspto_results = self.search_uspto(query, max_results_per_source, days_back)
results.extend(uspto_results)
self.logger.info(f"Retrieved {len(uspto_results)} results from USPTO")
except Exception as e:
self.logger.error(f"USPTO search error: {e}")
# EPO results
try:
epo_results = self.search_epo(query, max_results_per_source, days_back)
results.extend(epo_results)
self.logger.info(f"Retrieved {len(epo_results)} results from EPO")
except Exception as e:
self.logger.error(f"EPO search error: {e}")
# Lens results
try:
lens_results = self.search_lens(query, max_results_per_source)
results.extend(lens_results)
self.logger.info(f"Retrieved {len(lens_results)} results from Lens")
except Exception as e:
self.logger.error(f"Lens search error: {e}")
return results
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
config: A ConfigParser object containing API credentials and rate limit configurations. Expected sections: 'api_keys' (with uspto_api_key, epo_consumer_key, epo_consumer_secret, lens_api_key) and 'rate_limits' (with uspto_rate_limit, epo_rate_limit, lens_rate_limit as requests per second). All keys are optional with fallback to None or default rate limits.
Return Value
Instantiation returns a PatentClient object. Search methods (search_uspto, search_epo, search_lens, search_all) return List[Dict[str, Any]] containing patent metadata dictionaries. Each dictionary includes source-specific fields like application_id, title, abstract, dates, and URLs. Returns empty list on API errors or missing credentials.
Class Interface
Methods
__init__(self, config: ConfigParser)
Purpose: Initialize the PatentClient with API credentials and rate limiting configuration
Parameters:
config: ConfigParser object containing 'api_keys' and 'rate_limits' sections with API credentials and rate limit values
Returns: None (constructor)
search_uspto(self, query: str, max_results: int = 10, days_back: int = 365) -> List[Dict[str, Any]]
Purpose: Search the USPTO patent database for patents matching the query string
Parameters:
query: Search terms to query in patent application textmax_results: Maximum number of results to return (default: 10)days_back: Only include patents filed within this many days ago (default: 365)
Returns: List of dictionaries containing USPTO patent metadata including application_id, invention_title, applicant_name, file_date, patent_number, abstract, and url. Returns empty list on error.
search_epo(self, query: str, max_results: int = 10, days_back: int = 365) -> List[Dict[str, Any]]
Purpose: Search the European Patent Office (EPO) database for patents matching the query
Parameters:
query: Search terms to query in patent textmax_results: Maximum number of results to return (default: 10)days_back: Only include patents published within this many days ago (default: 365)
Returns: List of dictionaries containing EPO patent metadata including publication_number, title, abstract, filing_date, priority_date, and url. Returns empty list if credentials missing or on error.
search_lens(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]
Purpose: Search The Lens API for patents matching the query string
Parameters:
query: Search terms to query in patent datamax_results: Maximum number of results to return (default: 10)
Returns: List of dictionaries containing Lens patent metadata including lens_id, title, abstract, publication_date, priority_date, and url. Returns empty list if API key missing or on error.
search_all(self, query: str, max_results_per_source: int = 5, days_back: int = 365) -> List[Dict[str, Any]]
Purpose: Search all configured patent sources (USPTO, EPO, Lens) and combine results
Parameters:
query: Search terms to query across all sourcesmax_results_per_source: Maximum results to return from each source (default: 5)days_back: Only include patents from this many days ago for USPTO and EPO (default: 365)
Returns: Combined list of patent metadata dictionaries from all sources. Each dictionary includes a 'source' field indicating origin (uspto, epo, or lens). Returns partial results if some sources fail.
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
logger |
logging.Logger | Logger instance for recording API operations and errors | instance |
config |
ConfigParser | Configuration object containing API credentials and settings | instance |
uspto_api_key |
Optional[str] | API key for USPTO patent database access | instance |
epo_consumer_key |
Optional[str] | Consumer key for EPO API OAuth authentication | instance |
epo_consumer_secret |
Optional[str] | Consumer secret for EPO API OAuth authentication | instance |
lens_api_key |
Optional[str] | API key for The Lens patent database access | instance |
uspto_rate_limit |
float | Maximum requests per second allowed for USPTO API (default: 5.0) | instance |
epo_rate_limit |
float | Maximum requests per second allowed for EPO API (default: 2.0) | instance |
lens_rate_limit |
float | Maximum requests per second allowed for Lens API (default: 1.0) | instance |
last_uspto_request |
float | Timestamp of the last USPTO API request for rate limiting enforcement | instance |
last_epo_request |
float | Timestamp of the last EPO API request for rate limiting enforcement | instance |
last_lens_request |
float | Timestamp of the last Lens API request for rate limiting enforcement | instance |
Dependencies
timerequestsjsonospandastypingdatetimebs4urllib.parseconfigparserlogging
Required Imports
import time
import requests
import json
import os
import pandas as pd
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
import urllib.parse
from configparser import ConfigParser
import logging
Usage Example
from configparser import ConfigParser
import logging
# Setup logging
logging.basicConfig(level=logging.INFO)
# Create configuration
config = ConfigParser()
config.add_section('api_keys')
config.set('api_keys', 'uspto_api_key', 'your_uspto_key')
config.set('api_keys', 'lens_api_key', 'your_lens_key')
config.add_section('rate_limits')
config.set('rate_limits', 'uspto_rate_limit', '5')
config.set('rate_limits', 'lens_rate_limit', '1')
# Initialize client
client = PatentClient(config)
# Search USPTO for recent patents
uspto_results = client.search_uspto('artificial intelligence', max_results=10, days_back=365)
for patent in uspto_results:
print(f"Title: {patent['invention_title']}")
print(f"URL: {patent['url']}")
# Search all sources
all_results = client.search_all('machine learning', max_results_per_source=5, days_back=180)
print(f"Total results from all sources: {len(all_results)}")
Best Practices
- Always provide a properly configured ConfigParser object with at least one set of API credentials
- The class automatically handles rate limiting - do not make concurrent requests with multiple instances using the same credentials
- Check return values for empty lists which indicate API errors or missing credentials
- EPO and Lens searches require valid credentials; USPTO can work without an API key but may have lower rate limits
- Use search_all() for comprehensive searches across all sources, or individual search methods for targeted queries
- The days_back parameter filters results by date for USPTO and EPO; Lens does not support this parameter
- Rate limit timestamps are instance-specific; creating multiple instances may violate API rate limits
- Handle exceptions when calling search methods as network errors may occur
- Patent metadata structure varies by source; check the 'source' field to determine available fields
- URLs in results point to Google Patents, Espacenet, or Lens.org for easy access to full patent documents
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class LiteratureClient 66.4% similar
-
class ClinicalTrialsClient 63.3% similar
-
class CompanyNewsClient 56.8% similar
-
class FileCloudClient_v1 51.4% similar
-
class LLMClient_v2 49.2% similar