class Config_v5
A hierarchical configuration manager that loads and manages settings from multiple sources (defaults, files, environment variables) with support for nested structures and dynamic updates.
/tf/active/vicechatdev/invoice_extraction/config.py
10 - 347
moderate
Purpose
The Config class provides centralized configuration management for an invoice extraction system. It implements a layered configuration approach where settings can be defined through defaults, YAML/JSON files, and environment variables, with each layer overriding the previous. The class supports nested configuration structures, type conversion, dynamic updates, and persistence. It's designed to handle application settings, logging configuration, LLM parameters, extractor settings, validators, and storage options in a unified, hierarchical manner.
Source Code
class Config:
"""
Configuration manager for the invoice extraction system.
Handles loading configuration from different sources:
- Default configuration
- Configuration files (YAML/JSON)
- Environment variables
- Command-line arguments
Configuration is hierarchical with nested structures.
"""
# Default configuration settings
DEFAULT_CONFIG = {
# General settings
"app": {
"name": "Invoice Extraction System",
"version": "1.0.0",
"environment": "development",
"debug": False
},
# Logging configuration
"logging": {
"log_level": "INFO",
"log_to_file": False,
"log_dir": "logs",
"log_file": None, # Auto-generated if None
"json_logs": False,
"json_fields": [
"timestamp", "level", "logger", "message",
"module", "correlation_id", "execution_time_ms"
]
},
# LLM configuration
"llm": {
"provider": "openai",
"model": "gpt-4",
"temperature": 0.0,
"max_tokens": 4096,
"api_key": None, # Should be provided via env var or secure file
"api_endpoint": None, # For Azure or custom endpoints
"timeout": 60,
"max_retries": 3,
"retry_min_wait": 1,
"retry_max_wait": 10
},
# Extractor configuration
"extractors": {
"confidence_threshold": 0.7,
"timeout": 30,
"max_attempts": 2
},
# Validator configuration
"validators": {
"required_fields": {
"invoice.number": "critical",
"invoice.issue_date": "critical",
"vendor.name": "critical",
"amounts.total": "critical"
},
"uk_vat_rates": [20, 5, 0],
"be_vat_rates": [21, 12, 6, 0],
"au_gst_rate": 10
},
# Storage configuration
"storage": {
"type": "local",
"path": "/tf/active/invoice_extraction/data/processed",
"retention_days": 90
}
}
def __init__(self, config_path: Optional[str] = None, env_prefix: str = "INVOICE_EXTRACTION"):
"""
Initialize configuration.
Args:
config_path: Path to configuration file (YAML or JSON)
env_prefix: Prefix for environment variables
"""
self.config_path = config_path
self.env_prefix = env_prefix
# Start with default configuration
self.config = self.DEFAULT_CONFIG.copy()
# Load configuration from file if provided
if config_path:
self._load_from_file(config_path)
# Override with environment variables
self._load_from_env()
logger.debug(f"Configuration initialized: {len(self.config)} top-level keys")
def _load_from_file(self, config_path: str) -> None:
"""
Load configuration from file.
Args:
config_path: Path to configuration file
"""
if not os.path.exists(config_path):
logger.warning(f"Configuration file not found: {config_path}")
return
try:
with open(config_path, 'r') as f:
if config_path.endswith('.yaml') or config_path.endswith('.yml'):
file_config = yaml.safe_load(f)
elif config_path.endswith('.json'):
file_config = json.load(f)
else:
logger.warning(f"Unsupported configuration file format: {config_path}")
return
# Update configuration with file values
self._deep_update(self.config, file_config)
logger.info(f"Loaded configuration from {config_path}")
except Exception as e:
logger.error(f"Error loading configuration from {config_path}: {str(e)}")
def _load_from_env(self) -> None:
"""
Load configuration from environment variables.
Environment variables should be in format:
{ENV_PREFIX}__{SECTION}__{KEY}
For example:
INVOICE_EXTRACTION__LLM__API_KEY=sk-123456
"""
env_prefix = f"{self.env_prefix}__"
for env_var, value in os.environ.items():
if env_var.startswith(env_prefix):
# Remove prefix and split into parts
config_path = env_var[len(env_prefix):].lower()
parts = config_path.split('__')
if len(parts) < 2:
logger.warning(f"Ignoring malformed environment variable: {env_var}")
continue
# Convert value to appropriate type
typed_value = self._convert_value_type(value)
# Update config
self._set_nested_value(self.config, parts, typed_value)
logger.debug("Loaded configuration from environment variables")
def _convert_value_type(self, value: str) -> Any:
"""
Convert string value to appropriate type.
Args:
value: String value to convert
Returns:
Converted value (bool, int, float, or string)
"""
# Check for boolean
if value.lower() in ('true', 'yes', '1'):
return True
if value.lower() in ('false', 'no', '0'):
return False
# Check for null/None
if value.lower() in ('none', 'null'):
return None
# Check for integer
try:
return int(value)
except ValueError:
pass
# Check for float
try:
return float(value)
except ValueError:
pass
# Return as string
return value
def _set_nested_value(self, config_dict: Dict[str, Any], keys: List[str], value: Any) -> None:
"""
Set value in nested dictionary.
Args:
config_dict: Configuration dictionary
keys: List of keys representing path in dictionary
value: Value to set
"""
current = config_dict
# Navigate to the last level
for key in keys[:-1]:
if key not in current:
current[key] = {}
elif not isinstance(current[key], dict):
# If the key exists but is not a dict, make it a dict
current[key] = {}
current = current[key]
# Set the value at the last level
current[keys[-1]] = value
def _deep_update(self, original: Dict[str, Any], update: Dict[str, Any]) -> None:
"""
Deep update dictionary with another dictionary.
Args:
original: Original dictionary to update
update: Dictionary with updates
"""
for key, value in update.items():
if isinstance(value, dict) and key in original and isinstance(original[key], dict):
# Recursively update dictionaries
self._deep_update(original[key], value)
else:
# Replace or add value
original[key] = value
def get(self, key_path: str, default: Any = None) -> Any:
"""
Get configuration value by key path.
Args:
key_path: Dot-separated path to configuration value (e.g., 'llm.model')
default: Default value if key not found
Returns:
Configuration value
"""
keys = key_path.split('.')
value = self.config
for key in keys:
if isinstance(value, dict) and key in value:
value = value[key]
else:
return default
return value
def get_section(self, section: str) -> Dict[str, Any]:
"""
Get entire configuration section.
Args:
section: Section name (top-level key)
Returns:
Section configuration as dictionary
"""
return self.config.get(section, {}).copy()
def set(self, key_path: str, value: Any) -> None:
"""
Set configuration value.
Args:
key_path: Dot-separated path to configuration value
value: Value to set
"""
keys = key_path.split('.')
self._set_nested_value(self.config, keys, value)
def update(self, updates: Dict[str, Any]) -> None:
"""
Update configuration with dictionary.
Args:
updates: Dictionary with updates
"""
self._deep_update(self.config, updates)
def save(self, file_path: Optional[str] = None) -> None:
"""
Save configuration to file.
Args:
file_path: Path to save configuration (defaults to config_path)
"""
save_path = file_path or self.config_path
if not save_path:
logger.warning("No file path provided for saving configuration")
return
try:
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(os.path.abspath(save_path)), exist_ok=True)
with open(save_path, 'w') as f:
if save_path.endswith('.yaml') or save_path.endswith('.yml'):
yaml.dump(self.config, f, default_flow_style=False)
elif save_path.endswith('.json'):
json.dump(self.config, f, indent=2)
else:
# Default to JSON
json.dump(self.config, f, indent=2)
logger.info(f"Configuration saved to {save_path}")
except Exception as e:
logger.error(f"Error saving configuration to {save_path}: {str(e)}")
def reload(self) -> None:
"""Reload configuration from file."""
# Reset to default
self.config = self.DEFAULT_CONFIG.copy()
# Load from file
if self.config_path:
self._load_from_file(self.config_path)
# Apply environment overrides
self._load_from_env()
logger.info("Configuration reloaded")
def as_dict(self) -> Dict[str, Any]:
"""
Get complete configuration as dictionary.
Returns:
Configuration dictionary
"""
return self.config.copy()
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
config_path: Optional path to a YAML or JSON configuration file. If provided, settings from this file will override default configuration. The file is loaded during initialization and can be reloaded later. Supports both .yaml/.yml and .json extensions.
env_prefix: Prefix string used to identify relevant environment variables (default: 'INVOICE_EXTRACTION'). Environment variables matching the pattern '{env_prefix}__{SECTION}__{KEY}' will be parsed and used to override configuration values. For example, 'INVOICE_EXTRACTION__LLM__API_KEY' would set the LLM API key.
Return Value
Instantiation returns a Config object with fully initialized configuration. The get() method returns configuration values (Any type) or default if not found. The get_section() method returns a dictionary copy of a configuration section. The as_dict() method returns a complete copy of the configuration dictionary. The save() method returns None but persists configuration to disk.
Class Interface
Methods
__init__(self, config_path: Optional[str] = None, env_prefix: str = 'INVOICE_EXTRACTION') -> None
Purpose: Initialize the configuration manager by loading defaults, file configuration, and environment variables in hierarchical order
Parameters:
config_path: Optional path to YAML or JSON configuration fileenv_prefix: Prefix for environment variable parsing (default: 'INVOICE_EXTRACTION')
Returns: None (constructor)
_load_from_file(self, config_path: str) -> None
Purpose: Load configuration from a YAML or JSON file and merge with existing configuration
Parameters:
config_path: Path to the configuration file to load
Returns: None (updates internal config dictionary)
_load_from_env(self) -> None
Purpose: Parse environment variables matching the prefix pattern and override configuration values
Returns: None (updates internal config dictionary)
_convert_value_type(self, value: str) -> Any
Purpose: Convert string values from environment variables to appropriate Python types (bool, int, float, None, or str)
Parameters:
value: String value to convert
Returns: Converted value as bool, int, float, None, or str
_set_nested_value(self, config_dict: Dict[str, Any], keys: List[str], value: Any) -> None
Purpose: Set a value in a nested dictionary structure, creating intermediate dictionaries as needed
Parameters:
config_dict: The dictionary to updatekeys: List of keys representing the path to the target locationvalue: The value to set at the target location
Returns: None (modifies config_dict in place)
_deep_update(self, original: Dict[str, Any], update: Dict[str, Any]) -> None
Purpose: Recursively merge two dictionaries, updating nested structures while preserving unmodified keys
Parameters:
original: The original dictionary to updateupdate: The dictionary containing updates to apply
Returns: None (modifies original dictionary in place)
get(self, key_path: str, default: Any = None) -> Any
Purpose: Retrieve a configuration value using dot-separated path notation, returning a default if not found
Parameters:
key_path: Dot-separated path to the configuration value (e.g., 'llm.model')default: Value to return if the key path is not found (default: None)
Returns: The configuration value at the specified path, or the default value if not found
get_section(self, section: str) -> Dict[str, Any]
Purpose: Retrieve an entire top-level configuration section as a dictionary copy
Parameters:
section: Name of the top-level configuration section (e.g., 'llm', 'logging')
Returns: A copy of the configuration section as a dictionary, or empty dict if not found
set(self, key_path: str, value: Any) -> None
Purpose: Set a configuration value using dot-separated path notation, creating nested structures as needed
Parameters:
key_path: Dot-separated path to the configuration value (e.g., 'llm.temperature')value: The value to set at the specified path
Returns: None (updates internal configuration)
update(self, updates: Dict[str, Any]) -> None
Purpose: Update configuration with a dictionary, performing deep merge of nested structures
Parameters:
updates: Dictionary containing configuration updates to apply
Returns: None (updates internal configuration)
save(self, file_path: Optional[str] = None) -> None
Purpose: Persist the current configuration to a YAML or JSON file
Parameters:
file_path: Path where configuration should be saved (defaults to config_path if not provided)
Returns: None (writes configuration to disk)
reload(self) -> None
Purpose: Reset configuration to defaults and reload from file and environment variables
Returns: None (resets and reloads internal configuration)
as_dict(self) -> Dict[str, Any]
Purpose: Get a complete copy of the current configuration as a dictionary
Returns: A deep copy of the entire configuration dictionary
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
DEFAULT_CONFIG |
Dict[str, Any] | Class-level dictionary containing default configuration values for all sections (app, logging, llm, extractors, validators, storage) | class |
config_path |
Optional[str] | Path to the configuration file used during initialization, stored for reload and save operations | instance |
env_prefix |
str | Prefix used to identify relevant environment variables for configuration override | instance |
config |
Dict[str, Any] | The main configuration dictionary containing all merged settings from defaults, files, and environment variables | instance |
Dependencies
osjsonloggingyamlpathlibtyping
Required Imports
import os
import json
import logging
import yaml
from pathlib import Path
from typing import Dict, Any, Optional, List, Union
Usage Example
# Basic instantiation with defaults
config = Config()
# Load from file with custom env prefix
config = Config(config_path='/path/to/config.yaml', env_prefix='MY_APP')
# Get configuration values
model = config.get('llm.model') # Returns 'gpt-4'
log_level = config.get('logging.log_level', 'WARNING') # With default
# Get entire section
llm_config = config.get_section('llm')
# Set configuration values
config.set('llm.temperature', 0.5)
config.set('app.debug', True)
# Update multiple values
config.update({
'llm': {'model': 'gpt-4-turbo', 'temperature': 0.3},
'logging': {'log_level': 'DEBUG'}
})
# Save configuration to file
config.save('/path/to/output.yaml')
# Reload from original source
config.reload()
# Get complete configuration
all_config = config.as_dict()
Best Practices
- Always use environment variables for sensitive data like API keys rather than storing them in configuration files
- Use the get() method with default values to handle missing configuration gracefully
- Call reload() if configuration files are modified externally during runtime
- Use get_section() instead of direct dictionary access to avoid modifying the internal configuration state
- Environment variables take precedence over file configuration, which takes precedence over defaults
- Use dot notation for nested keys (e.g., 'llm.model') rather than navigating dictionaries manually
- Save configuration files with appropriate extensions (.yaml, .yml, or .json) for proper serialization
- The configuration is loaded in order: defaults → file → environment variables, with each layer overriding the previous
- Use set() for single value updates and update() for bulk changes to maintain consistency
- The class creates deep copies when returning configuration data to prevent external modifications
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class Config_v3 73.8% similar
-
function load_config 66.6% similar
-
class Config 64.7% similar
-
class Config_v1 60.2% similar
-
class Config_v4 57.8% similar