class LLMClient
A singleton client class for interacting with multiple LLM providers (OpenAI, Anthropic, Azure OpenAI, and local models) with unified interface for text generation and structured data extraction.
/tf/active/vicechatdev/invoice_extraction/utils/llm_client.py
14 - 461
complex
Purpose
Provides a unified interface for working with different LLM providers, handling authentication, API calls, token tracking, retry logic, and structured data extraction. Implements singleton pattern to prevent duplicate initializations with the same configuration. Supports multiple providers with automatic fallback and error handling, making it easy to switch between different LLM services without changing application code.
Source Code
class LLMClient:
"""Client for interacting with LLM providers (OpenAI, Anthropic, Azure, etc.)"""
# Class-level variable to track instances
_instances = {}
def __new__(cls, config=None):
"""
Implement singleton pattern to prevent multiple initializations
with the same configuration.
"""
config = config or {}
# Create a key from the core config parameters
provider = config.get('provider', 'openai')
model = config.get('model', 'gpt-4o')
key = f"{provider}:{model}"
# If an instance with this configuration exists, return it
if key in cls._instances:
return cls._instances[key]
# Otherwise create a new instance
instance = super(LLMClient, cls).__new__(cls)
cls._instances[key] = instance
return instance
def __init__(self, config=None):
"""
Initialize the LLM client with the provided configuration.
Args:
config: Dictionary with configuration parameters:
- provider: 'openai', 'anthropic', 'azure', or 'local'
- model: Model name to use
- api_key: API key for the selected provider
- temperature: Sampling temperature (0-1)
- max_tokens: Maximum tokens in completion
- (other provider-specific parameters)
"""
config = config or {}
# Check if this instance has already been initialized
if hasattr(self, 'initialized') and self.initialized:
return
self.provider = config.get('provider', 'openai')
self.model = config.get('model', 'gpt-4o')
self.temperature = config.get('temperature', 0.0)
self.max_tokens = config.get('max_tokens', 1000)
self.retry_attempts = config.get('retry_attempts', 3)
self.retry_delay = config.get('retry_delay', 1)
# Track token usage
self.total_prompt_tokens = 0
self.total_completion_tokens = 0
# Initialize client based on provider
if self.provider == 'openai':
try:
import openai
api_key = config.get('api_key') or os.environ.get('OPENAI_API_KEY')
if not api_key:
raise ValueError("OpenAI API key is required")
self.client = openai.OpenAI(api_key=api_key)
except ImportError:
logger.error("OpenAI package not installed. Run 'pip install openai'")
raise
elif self.provider == 'anthropic':
try:
import anthropic
api_key = config.get('api_key') or os.environ.get('ANTHROPIC_API_KEY')
if not api_key:
raise ValueError("Anthropic API key is required")
self.client = anthropic.Anthropic(api_key=api_key)
except ImportError:
logger.error("Anthropic package not installed. Run 'pip install anthropic'")
raise
elif self.provider == 'azure':
try:
from openai import AzureOpenAI
api_key = config.get('api_key') or os.environ.get('AZURE_OPENAI_API_KEY')
if not api_key:
raise ValueError("Azure OpenAI API key is required")
api_endpoint = config.get('api_endpoint') or os.environ.get('AZURE_OPENAI_ENDPOINT')
if not api_endpoint:
raise ValueError("Azure OpenAI API endpoint is required")
api_version = config.get('api_version', '2023-05-15')
self.azure_deployment = config.get('deployment')
if not self.azure_deployment:
raise ValueError("Azure deployment name is required")
self.client = AzureOpenAI(
api_key=api_key,
api_version=api_version,
azure_endpoint=api_endpoint
)
except ImportError:
logger.error("OpenAI package not installed. Run 'pip install openai'")
raise
elif self.provider == 'local':
import requests
self.client = requests
self._api_endpoint = config.get('api_endpoint', 'http://localhost:8000/v1/completions')
else:
raise ValueError(f"Unsupported provider: {self.provider}")
logger.info(f"Initialized LLM client for provider: {self.provider}, model: {self.model}")
self.initialized = True
def _get_api_key(self) -> Optional[str]:
"""Get API key from config or environment variables."""
# First check config
api_key = self.config.get('api_key')
# Then check environment variables based on provider
if not api_key:
if self.provider == 'openai':
api_key = os.environ.get('OPENAI_API_KEY')
elif self.provider == 'azure':
api_key = os.environ.get('AZURE_OPENAI_API_KEY')
elif self.provider == 'anthropic':
api_key = os.environ.get('ANTHROPIC_API_KEY')
elif self.provider == 'local':
# Local models might not need API key
api_key = os.environ.get('LOCAL_LLM_API_KEY')
if not api_key and self.provider != 'local':
logger.warning(f"No API key provided for {self.provider} provider")
return api_key
def _setup_provider(self):
"""Set up the specified LLM provider."""
if self.provider == 'openai':
try:
import openai
self.client = openai.OpenAI(api_key=self._api_key)
if self._api_endpoint:
self.client.base_url = self._api_endpoint
except ImportError:
logger.error("OpenAI package not installed. Install with: pip install openai")
self.client = None
elif self.provider == 'azure':
try:
from openai import AzureOpenAI
if not self._api_endpoint:
logger.error("Azure API endpoint required but not provided")
self.client = None
return
if not self.azure_deployment:
logger.error("Azure deployment name required but not provided")
self.client = None
return
self.client = AzureOpenAI(
api_key=self._api_key,
api_version=self.azure_api_version,
azure_endpoint=self._api_endpoint
)
except ImportError:
logger.error("OpenAI package not installed. Install with: pip install openai")
self.client = None
elif self.provider == 'anthropic':
try:
import anthropic
self.client = anthropic.Anthropic(api_key=self._api_key)
except ImportError:
logger.error("Anthropic package not installed. Install with: pip install anthropic")
self.client = None
elif self.provider == 'local':
# For local models, we'll use direct API calls via requests
self.client = requests
# Set default endpoint if not provided
if not self._api_endpoint:
self._api_endpoint = "http://localhost:8000/v1/completions"
logger.info(f"Using default local API endpoint: {self._api_endpoint}")
else:
logger.error(f"Unsupported provider: {self.provider}")
self.client = None
@retry(
stop=stop_after_attempt(3), # Retry 3 times
wait=wait_exponential(multiplier=1, min=1, max=10), # Exponential backoff
retry=retry_if_exception_type(RequestException)
)
def generate(self, prompt: str, system_message: Optional[str] = None) -> str:
"""
Generate text using the configured LLM.
Args:
prompt: The prompt to send to the LLM
system_message: Optional system message (for models that support it)
Returns:
Generated text response from the LLM
"""
if not self.client:
logger.error(f"LLM client for {self.provider} not properly initialized")
return "ERROR: LLM client not initialized"
try:
if self.provider == 'openai':
return self._generate_openai(prompt, system_message)
elif self.provider == 'azure':
return self._generate_azure(prompt, system_message)
elif self.provider == 'anthropic':
return self._generate_anthropic(prompt, system_message)
elif self.provider == 'local':
return self._generate_local(prompt, system_message)
else:
logger.error(f"Generation not implemented for provider: {self.provider}")
return "ERROR: Unsupported provider"
except Exception as e:
logger.error(f"Error generating text with {self.provider}: {str(e)}")
# Re-raise to trigger retry
raise
def _generate_openai(self, prompt: str, system_message: Optional[str] = None) -> str:
"""Generate text using OpenAI's API."""
messages = []
# Add system message if provided
if system_message:
messages.append({"role": "system", "content": system_message})
# Add user prompt
messages.append({"role": "user", "content": prompt})
start_time = time.time()
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
temperature=self.temperature,
max_tokens=self.max_tokens
)
end_time = time.time()
# Log timing and token usage
logger.debug(f"OpenAI request took {end_time - start_time:.2f} seconds")
# Update token counts if available
if hasattr(response, 'usage'):
self.total_prompt_tokens += response.usage.prompt_tokens
self.total_completion_tokens += response.usage.completion_tokens
logger.debug(f"Tokens: {response.usage.prompt_tokens} prompt, {response.usage.completion_tokens} completion")
# Extract text from response
return response.choices[0].message.content.strip()
def _generate_azure(self, prompt: str, system_message: Optional[str] = None) -> str:
"""Generate text using Azure OpenAI's API."""
messages = []
# Add system message if provided
if system_message:
messages.append({"role": "system", "content": system_message})
# Add user prompt
messages.append({"role": "user", "content": prompt})
start_time = time.time()
response = self.client.chat.completions.create(
model=self.azure_deployment, # For Azure, we use deployment name
messages=messages,
temperature=self.temperature,
max_tokens=self.max_tokens
)
end_time = time.time()
# Log timing and token usage
logger.debug(f"Azure OpenAI request took {end_time - start_time:.2f} seconds")
# Update token counts if available
if hasattr(response, 'usage'):
self.total_prompt_tokens += response.usage.prompt_tokens
self.total_completion_tokens += response.usage.completion_tokens
logger.debug(f"Tokens: {response.usage.prompt_tokens} prompt, {response.usage.completion_tokens} completion")
# Extract text from response
return response.choices[0].message.content.strip()
def _generate_anthropic(self, prompt: str, system_message: Optional[str] = None) -> str:
"""Generate text using Anthropic's API."""
# Prepare system message or default
system = system_message or "You are a helpful assistant specializing in extracting data from invoices."
start_time = time.time()
response = self.client.messages.create(
model=self.model,
system=system,
messages=[{"role": "user", "content": prompt}],
temperature=self.temperature,
max_tokens=self.max_tokens
)
end_time = time.time()
# Log timing
logger.debug(f"Anthropic request took {end_time - start_time:.2f} seconds")
# Update token counts if available
if hasattr(response, 'usage'):
input_tokens = getattr(response.usage, 'input_tokens', 0)
output_tokens = getattr(response.usage, 'output_tokens', 0)
self.total_prompt_tokens += input_tokens
self.total_completion_tokens += output_tokens
logger.debug(f"Tokens: {input_tokens} input, {output_tokens} output")
# Extract text from response
return response.content[0].text
def _generate_local(self, prompt: str, system_message: Optional[str] = None) -> str:
"""Generate text using local LLM API."""
# Prepare payload based on common local API formats
# This is compatible with common local APIs like llama.cpp server
payload = {
"prompt": prompt,
"temperature": self.temperature,
"max_tokens": self.max_tokens
}
if system_message:
# For local models that support system messages
payload["system"] = system_message
headers = {"Content-Type": "application/json"}
if self._api_key:
headers["Authorization"] = f"Bearer {self._api_key}"
start_time = time.time()
response = self.client.post(
self._api_endpoint,
headers=headers,
data=json.dumps(payload),
timeout=self.timeout
)
end_time = time.time()
# Log timing
logger.debug(f"Local LLM request took {end_time - start_time:.2f} seconds")
# Ensure the request was successful
response.raise_for_status()
# Parse the response
result = response.json()
# Extract text based on common response formats
if "choices" in result and len(result["choices"]) > 0:
if "text" in result["choices"][0]:
return result["choices"][0]["text"].strip()
elif "message" in result["choices"][0]:
return result["choices"][0]["message"]["content"].strip()
# Fallback for other formats
if "response" in result:
return result["response"].strip()
# If we can't determine the output format, return the raw response
logger.warning("Couldn't extract text from local LLM response, returning raw JSON")
return json.dumps(result)
def extract_structured_data(self, text: str, schema: Dict[str, Any],
system_message: Optional[str] = None) -> Dict[str, Any]:
"""
Extract structured data from text using the LLM.
Args:
text: Text to extract data from
schema: JSON schema describing the expected output structure
system_message: Optional system message
Returns:
Structured data as dictionary
"""
# Convert schema to string representation
schema_str = json.dumps(schema, indent=2)
# Create a prompt that instructs the model to extract structured data
prompt = f"""Extract the following structured information from this text:
{text}
Return the data as a valid JSON object with this schema:
{schema_str}
Only return the JSON object, nothing else.
"""
# Default system message if none provided
if not system_message:
system_message = "You are a data extraction assistant. Extract data from text into structured JSON format."
# Generate the response
response = self.generate(prompt, system_message)
try:
# Try to extract JSON from the response
# First, look for code blocks
json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', response)
if json_match:
json_str = json_match.group(1)
else:
# If no code blocks, use the entire response
json_str = response
# Parse the JSON
data = json.loads(json_str)
return data
except json.JSONDecodeError:
logger.error("Failed to parse structured data response as JSON")
return {"error": "Failed to parse response", "raw_response": response}
def is_available(self) -> bool:
"""Check if the LLM client is properly initialized and available."""
return self.client is not None
def get_usage_stats(self) -> Dict[str, Any]:
"""Get token usage statistics."""
return {
"prompt_tokens": self.total_prompt_tokens,
"completion_tokens": self.total_completion_tokens,
"total_tokens": self.total_prompt_tokens + self.total_completion_tokens,
"estimated_cost": self.total_cost,
"provider": self.provider,
"model": self.model
}
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
config: Optional dictionary containing configuration parameters. Key fields include: 'provider' (str: 'openai', 'anthropic', 'azure', or 'local'), 'model' (str: model name like 'gpt-4o'), 'api_key' (str: API key for the provider), 'temperature' (float: 0-1, sampling temperature), 'max_tokens' (int: maximum tokens in completion), 'retry_attempts' (int: number of retry attempts), 'retry_delay' (int: delay between retries in seconds), 'api_endpoint' (str: custom API endpoint, required for Azure and local), 'deployment' (str: Azure deployment name, required for Azure), 'api_version' (str: Azure API version, defaults to '2023-05-15'). If not provided, defaults to OpenAI with gpt-4o model.
Return Value
Instantiation returns a singleton LLMClient instance configured for the specified provider. The generate() method returns a string containing the LLM's text response. The extract_structured_data() method returns a dictionary containing structured data extracted according to the provided schema. The get_usage_stats() method returns a dictionary with token usage statistics including prompt_tokens, completion_tokens, total_tokens, estimated_cost, provider, and model.
Class Interface
Methods
__new__(cls, config=None) -> LLMClient
Purpose: Implements singleton pattern to return existing instance if one exists with the same provider:model configuration
Parameters:
config: Optional configuration dictionary with provider and model keys
Returns: LLMClient instance (existing or new)
__init__(self, config=None) -> None
Purpose: Initializes the LLM client with provider-specific configuration, sets up API clients, and initializes token tracking
Parameters:
config: Optional dictionary with provider, model, api_key, temperature, max_tokens, retry_attempts, retry_delay, and provider-specific parameters
Returns: None
_get_api_key(self) -> Optional[str]
Purpose: Retrieves API key from config or environment variables based on provider
Returns: API key string if found, None otherwise
_setup_provider(self) -> None
Purpose: Sets up the provider-specific client instance (OpenAI, Azure, Anthropic, or local)
Returns: None
generate(self, prompt: str, system_message: Optional[str] = None) -> str
Purpose: Generates text using the configured LLM with automatic retry logic and token tracking
Parameters:
prompt: The text prompt to send to the LLMsystem_message: Optional system message for context (supported by most providers)
Returns: Generated text response from the LLM as a string
_generate_openai(self, prompt: str, system_message: Optional[str] = None) -> str
Purpose: Internal method to generate text using OpenAI's chat completions API
Parameters:
prompt: User prompt textsystem_message: Optional system message
Returns: Generated text from OpenAI
_generate_azure(self, prompt: str, system_message: Optional[str] = None) -> str
Purpose: Internal method to generate text using Azure OpenAI's API with deployment name
Parameters:
prompt: User prompt textsystem_message: Optional system message
Returns: Generated text from Azure OpenAI
_generate_anthropic(self, prompt: str, system_message: Optional[str] = None) -> str
Purpose: Internal method to generate text using Anthropic's Claude API
Parameters:
prompt: User prompt textsystem_message: Optional system message
Returns: Generated text from Anthropic Claude
_generate_local(self, prompt: str, system_message: Optional[str] = None) -> str
Purpose: Internal method to generate text using local LLM API endpoint
Parameters:
prompt: User prompt textsystem_message: Optional system message
Returns: Generated text from local LLM
extract_structured_data(self, text: str, schema: Dict[str, Any], system_message: Optional[str] = None) -> Dict[str, Any]
Purpose: Extracts structured data from text according to a JSON schema using the LLM
Parameters:
text: Text to extract data fromschema: JSON schema dictionary describing expected output structuresystem_message: Optional system message for extraction context
Returns: Dictionary containing extracted structured data, or error dictionary if parsing fails
is_available(self) -> bool
Purpose: Checks if the LLM client is properly initialized and ready to use
Returns: True if client is initialized, False otherwise
get_usage_stats(self) -> Dict[str, Any]
Purpose: Returns token usage statistics and cost estimates for all API calls made by this instance
Returns: Dictionary with prompt_tokens, completion_tokens, total_tokens, estimated_cost, provider, and model
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
_instances |
Dict[str, LLMClient] | Class-level dictionary tracking singleton instances by provider:model key | class |
provider |
str | LLM provider name ('openai', 'anthropic', 'azure', or 'local') | instance |
model |
str | Model name to use (e.g., 'gpt-4o', 'claude-3-opus') | instance |
temperature |
float | Sampling temperature for generation (0.0 to 1.0) | instance |
max_tokens |
int | Maximum number of tokens in completion | instance |
retry_attempts |
int | Number of retry attempts for failed API calls | instance |
retry_delay |
int | Delay in seconds between retry attempts | instance |
total_prompt_tokens |
int | Cumulative count of prompt tokens used across all API calls | instance |
total_completion_tokens |
int | Cumulative count of completion tokens used across all API calls | instance |
client |
Union[openai.OpenAI, anthropic.Anthropic, AzureOpenAI, requests.Session] | Provider-specific client instance for making API calls | instance |
azure_deployment |
str | Azure deployment name (only set when provider is 'azure') | instance |
_api_endpoint |
str | API endpoint URL (used for Azure and local providers) | instance |
initialized |
bool | Flag indicating whether the instance has been fully initialized | instance |
Dependencies
loggingosjsontimetypingcopyrequeststenacityreopenaianthropic
Required Imports
import logging
import os
import json
import time
from typing import Dict, Any, Optional, List
import copy
import requests
from requests.exceptions import RequestException
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
import re
Conditional/Optional Imports
These imports are only needed under specific conditions:
import openai
Condition: only if provider is 'openai'
Optionalfrom openai import AzureOpenAI
Condition: only if provider is 'azure'
Optionalimport anthropic
Condition: only if provider is 'anthropic'
OptionalUsage Example
# Basic usage with OpenAI
import os
os.environ['OPENAI_API_KEY'] = 'your-api-key'
config = {
'provider': 'openai',
'model': 'gpt-4o',
'temperature': 0.7,
'max_tokens': 1000
}
client = LLMClient(config)
# Generate text
response = client.generate(
prompt='Explain quantum computing in simple terms',
system_message='You are a helpful science teacher'
)
print(response)
# Extract structured data
schema = {
'type': 'object',
'properties': {
'name': {'type': 'string'},
'age': {'type': 'integer'},
'email': {'type': 'string'}
}
}
text = 'John Doe is 30 years old. His email is john@example.com'
data = client.extract_structured_data(text, schema)
print(data)
# Check usage statistics
stats = client.get_usage_stats()
print(f"Total tokens used: {stats['total_tokens']}")
# Using with Azure
azure_config = {
'provider': 'azure',
'model': 'gpt-4',
'deployment': 'my-gpt4-deployment',
'api_key': 'azure-key',
'api_endpoint': 'https://my-resource.openai.azure.com/',
'api_version': '2023-05-15'
}
azure_client = LLMClient(azure_config)
response = azure_client.generate('Hello, world!')
Best Practices
- The class implements singleton pattern - multiple instantiations with the same provider:model combination return the same instance
- Always set API keys via environment variables or config parameter before instantiation
- For Azure provider, both deployment name and api_endpoint are required
- The generate() method includes automatic retry logic with exponential backoff for network errors
- Token usage is tracked automatically in total_prompt_tokens and total_completion_tokens attributes
- Use extract_structured_data() for reliable JSON extraction from LLM responses
- Check is_available() before making API calls to ensure client is properly initialized
- The class handles JSON extraction from code blocks automatically in extract_structured_data()
- System messages are optional but recommended for better context control
- Local provider expects a compatible API endpoint (llama.cpp server format)
- Instance is marked as initialized after first __init__ call to prevent re-initialization
- Use get_usage_stats() to monitor token consumption and estimated costs
- Temperature should be between 0 (deterministic) and 1 (creative)
- Handle potential exceptions from generate() method as retries may eventually fail
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class LLMClient_v2 85.7% similar
-
class LLMClient_v1 85.3% similar
-
class LLMClient_v1 81.3% similar
-
class TestLLMClient 80.2% similar
-
function get_llm_instance 75.6% similar