class UKValidator
UK-specific invoice data validator that extends BaseValidator to implement validation rules specific to UK invoices including VAT number format, UK addresses, VAT rates, and banking details.
/tf/active/vicechatdev/invoice_extraction/validators/uk_validator.py
10 - 272
moderate
Purpose
This class provides comprehensive validation for UK invoices by checking UK-specific requirements such as VAT number format (GB followed by 9 or 12 digits), UK postcode patterns, standard UK VAT rates (20%, 5%, 0%), UK banking details (sort codes and account numbers), GBP currency validation, and VAT calculation consistency. It extends BaseValidator to add UK-specific required fields and validation logic while maintaining the base validation framework.
Source Code
class UKValidator(BaseValidator):
"""
UK-specific invoice data validator.
Implements validation rules specific to UK invoices:
- VAT number format validation
- UK address verification
- VAT rate consistency
- UK-specific date formats
"""
def __init__(self, config=None):
super().__init__(config)
# UK-specific required fields
uk_required = {
'vendor.vat_number': 'important', # UK VAT number (not always critical, but important)
'amounts.vat': 'critical', # VAT amount must be present
'amounts.vat_rate': 'important' # VAT rate is important but not always critical
}
# Update required fields with UK-specific ones
self.required_fields.update(uk_required)
# UK VAT rates
self.uk_vat_rates = self.config.get('uk_vat_rates', [20, 5, 0])
# UK VAT number regex
self.uk_vat_regex = r'^GB\d{9}$|^GB\d{12}$'
# UK sort code regex
self.uk_sort_code_regex = r'^\d{2}-\d{2}-\d{2}$'
# UK account number regex
self.uk_account_number_regex = r'^\d{8}$'
# UK postcode regex
self.uk_postcode_regex = r'^[A-Z]{1,2}[0-9][A-Z0-9]? ?[0-9][A-Z]{2}$'
def _entity_specific_validation(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Perform UK-specific validation.
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
# 1. VAT number format validation
self._validate_uk_vat_number(extraction_result, result)
# 2. Address verification (check for UK postal code)
self._validate_uk_address(extraction_result, result)
# 3. VAT rate validation
self._validate_vat_rate(extraction_result, result)
# 4. Banking details validation
self._validate_uk_banking_details(extraction_result, result)
# 5. Validate currency is GBP
self._validate_uk_currency(extraction_result, result)
# 6. Additional UK-specific calculations/validations
self._validate_vat_calculation(extraction_result, result)
def _validate_uk_vat_number(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Validate UK VAT number format.
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
vat_number = self._get_nested_field(extraction_result, 'vendor.vat_number')
if vat_number:
# Remove spaces and other non-alphanumeric characters
vat_number = re.sub(r'[^a-zA-Z0-9]', '', vat_number)
# Check if format matches UK VAT number
if not re.match(self.uk_vat_regex, vat_number, re.IGNORECASE):
result.add_issue(
'vendor.vat_number',
f"Invalid UK VAT number format: {vat_number}. Should be GB followed by 9 or 12 digits.",
'warning'
)
# Ensure it starts with GB
if not vat_number.upper().startswith('GB'):
result.add_issue(
'vendor.vat_number',
f"UK VAT number must start with 'GB', got: {vat_number}",
'warning'
)
def _validate_uk_address(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Validate UK address (checks for UK postal code pattern).
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
address = self._get_nested_field(extraction_result, 'vendor.address')
if address:
# Check if address contains a UK postcode
postcodes = re.findall(self.uk_postcode_regex, address.upper())
if not postcodes:
result.add_issue(
'vendor.address',
"No valid UK postcode found in address",
'warning'
)
# Check for UK-specific terms
uk_terms = ['UK', 'UNITED KINGDOM', 'ENGLAND', 'SCOTLAND', 'WALES', 'NORTHERN IRELAND']
has_uk_term = any(term in address.upper() for term in uk_terms)
if not has_uk_term and not postcodes:
result.add_issue(
'vendor.address',
"Address does not appear to be from the UK",
'warning'
)
def _validate_vat_rate(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Validate VAT rate against known UK rates.
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
vat_rate = self._get_nested_field(extraction_result, 'amounts.vat_rate')
if vat_rate is not None:
try:
vat_rate_float = float(vat_rate)
# Check if VAT rate is one of the standard UK rates
closest_rate = min(self.uk_vat_rates, key=lambda x: abs(x - vat_rate_float))
# If not close to a standard rate, flag it
if abs(closest_rate - vat_rate_float) > 1.0: # Allow 1% margin of error
result.add_issue(
'amounts.vat_rate',
f"Unusual VAT rate: {vat_rate}%. UK standard rates are: {', '.join(map(str, self.uk_vat_rates))}%",
'warning'
)
except (ValueError, TypeError):
result.add_issue(
'amounts.vat_rate',
f"Invalid VAT rate format: {vat_rate}",
'warning'
)
def _validate_uk_banking_details(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Validate UK banking details.
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
sort_code = self._get_nested_field(extraction_result, 'payment.sort_code')
account_number = self._get_nested_field(extraction_result, 'payment.account_number')
# Sort code validation
if sort_code:
# Remove spaces and other non-alphanumeric characters
clean_sort_code = re.sub(r'[^0-9-]', '', sort_code)
# If no dashes, try to format it
if '-' not in clean_sort_code and len(clean_sort_code) == 6:
clean_sort_code = f"{clean_sort_code[0:2]}-{clean_sort_code[2:4]}-{clean_sort_code[4:6]}"
# Check format
if not re.match(self.uk_sort_code_regex, clean_sort_code):
result.add_issue(
'payment.sort_code',
f"Invalid UK sort code format: {sort_code}. Should be XX-XX-XX.",
'warning'
)
# Account number validation
if account_number:
# Remove spaces and other non-numeric characters
clean_account = re.sub(r'[^0-9]', '', account_number)
# Check length
if len(clean_account) != 8:
result.add_issue(
'payment.account_number',
f"Invalid UK account number length: {clean_account}. Should be 8 digits.",
'warning'
)
def _validate_uk_currency(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Validate that the currency is GBP for UK invoices.
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
currency = self._get_nested_field(extraction_result, 'amounts.currency')
if currency:
# Normalize currency code
currency = currency.upper().strip()
# Check if it's a UK currency code
uk_currencies = ['GBP', '£', 'POUND', 'POUNDS', 'STERLING']
if not any(uk_curr in currency for uk_curr in uk_currencies):
result.add_issue(
'amounts.currency',
f"Non-UK currency detected: {currency}. Expected GBP for UK invoice.",
'warning'
)
def _validate_vat_calculation(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Validate that the VAT calculation is consistent with the VAT rate.
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
subtotal = self._get_nested_field(extraction_result, 'amounts.subtotal')
vat = self._get_nested_field(extraction_result, 'amounts.vat')
vat_rate = self._get_nested_field(extraction_result, 'amounts.vat_rate')
if subtotal is not None and vat is not None and vat_rate is not None:
try:
subtotal_float = float(subtotal)
vat_float = float(vat)
vat_rate_float = float(vat_rate) / 100 # Convert percentage to decimal
# Calculate expected VAT
expected_vat = subtotal_float * vat_rate_float
# Compare with tolerance
if abs(expected_vat - vat_float) > max(0.01, subtotal_float * 0.01): # 1p or 1% tolerance
result.add_issue(
'amounts.vat',
f"VAT amount ({vat_float}) doesn't match the calculated VAT " +
f"({expected_vat:.2f}) based on subtotal ({subtotal_float}) " +
f"and VAT rate ({vat_rate_float*100}%)",
'warning'
)
except (ValueError, TypeError):
# Already handled by type validation
pass
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
BaseValidator | - |
Parameter Details
config: Optional configuration dictionary that can contain 'uk_vat_rates' (list of valid UK VAT rates, defaults to [20, 5, 0]) and other configuration options inherited from BaseValidator. If None, default configuration is used.
Return Value
Instantiation returns a UKValidator object configured with UK-specific validation rules. The main validation method (_entity_specific_validation) does not return a value but modifies the ValidationResult object passed to it by adding validation issues. Individual validation methods also modify the ValidationResult object in place.
Class Interface
Methods
__init__(self, config=None)
Purpose: Initialize the UKValidator with UK-specific validation rules and configuration
Parameters:
config: Optional dictionary containing configuration options including 'uk_vat_rates' (defaults to [20, 5, 0])
Returns: None (constructor)
_entity_specific_validation(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Main validation method that orchestrates all UK-specific validation checks
Parameters:
extraction_result: Dictionary containing extracted invoice fields with nested structureresult: ValidationResult object to which validation issues are added
Returns: None (modifies result object in place)
_validate_uk_vat_number(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Validate UK VAT number format (GB followed by 9 or 12 digits)
Parameters:
extraction_result: Dictionary containing extracted invoice fieldsresult: ValidationResult object to add issues to
Returns: None (modifies result object in place)
_validate_uk_address(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Validate UK address by checking for valid UK postcode pattern and UK-specific terms
Parameters:
extraction_result: Dictionary containing extracted invoice fieldsresult: ValidationResult object to add issues to
Returns: None (modifies result object in place)
_validate_vat_rate(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Validate VAT rate against known UK standard rates (20%, 5%, 0%) with 1% tolerance
Parameters:
extraction_result: Dictionary containing extracted invoice fieldsresult: ValidationResult object to add issues to
Returns: None (modifies result object in place)
_validate_uk_banking_details(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Validate UK banking details including sort code (XX-XX-XX format) and account number (8 digits)
Parameters:
extraction_result: Dictionary containing extracted invoice fieldsresult: ValidationResult object to add issues to
Returns: None (modifies result object in place)
_validate_uk_currency(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Validate that the currency is GBP or other UK currency representation
Parameters:
extraction_result: Dictionary containing extracted invoice fieldsresult: ValidationResult object to add issues to
Returns: None (modifies result object in place)
_validate_vat_calculation(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Validate that VAT amount matches calculated VAT based on subtotal and VAT rate with tolerance
Parameters:
extraction_result: Dictionary containing extracted invoice fieldsresult: ValidationResult object to add issues to
Returns: None (modifies result object in place)
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
required_fields |
Dict[str, str] | Dictionary mapping field paths to their importance level ('critical' or 'important'), inherited from BaseValidator and updated with UK-specific fields | instance |
uk_vat_rates |
List[int] | List of valid UK VAT rates in percentages, defaults to [20, 5, 0] | instance |
uk_vat_regex |
str | Regular expression pattern for validating UK VAT numbers (GB followed by 9 or 12 digits) | instance |
uk_sort_code_regex |
str | Regular expression pattern for validating UK sort codes (XX-XX-XX format) | instance |
uk_account_number_regex |
str | Regular expression pattern for validating UK account numbers (8 digits) | instance |
uk_postcode_regex |
str | Regular expression pattern for validating UK postcodes | instance |
config |
Dict[str, Any] | Configuration dictionary inherited from BaseValidator containing validation settings | instance |
Dependencies
reloggingtypingdatetime
Required Imports
import re
import logging
from typing import Dict, Any
from datetime import datetime
from validators.base_validator import BaseValidator, ValidationResult
Usage Example
from validators.uk_validator import UKValidator
from validators.base_validator import ValidationResult
# Instantiate with default configuration
validator = UKValidator()
# Or with custom VAT rates
config = {'uk_vat_rates': [20, 5, 0]}
validator = UKValidator(config=config)
# Prepare extraction result from invoice
extraction_result = {
'vendor': {
'vat_number': 'GB123456789',
'address': '123 High Street, London, SW1A 1AA, UK'
},
'amounts': {
'subtotal': 100.00,
'vat': 20.00,
'vat_rate': 20,
'total': 120.00,
'currency': 'GBP'
},
'payment': {
'sort_code': '12-34-56',
'account_number': '12345678'
}
}
# Create validation result object
result = ValidationResult()
# Perform validation
validator._entity_specific_validation(extraction_result, result)
# Check validation results
if result.is_valid:
print('Invoice is valid')
else:
for issue in result.issues:
print(f'{issue.severity}: {issue.field} - {issue.message}')
Best Practices
- Always instantiate UKValidator before calling validation methods
- Pass a ValidationResult object to collect validation issues rather than expecting return values
- The _entity_specific_validation method is the main entry point and should be called from the parent validate() method
- All validation methods are protected (prefixed with _) and designed to be called internally
- Validation methods modify the ValidationResult object in place by calling result.add_issue()
- The class maintains state through instance attributes (uk_vat_rates, regex patterns) set during initialization
- VAT calculations use tolerance thresholds (1p or 1% of subtotal) to account for rounding differences
- All validation issues are added as 'warning' severity, not 'error', allowing processing to continue
- The class expects nested dictionary structure for extraction_result (e.g., 'vendor.vat_number')
- Use _get_nested_field() method inherited from BaseValidator to safely access nested fields
- Currency validation accepts multiple UK currency representations: GBP, £, POUND, POUNDS, STERLING
- Banking details validation attempts to auto-format sort codes if dashes are missing
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class TestUKValidator 85.7% similar
-
class BEValidator 76.7% similar
-
class BaseValidator 76.3% similar
-
class AUValidator 74.9% similar
-
class TestUKExtractor 71.7% similar