class BEValidator
Belgium-specific invoice data validator that extends BaseValidator to implement Belgian invoice validation rules including VAT number format, address verification, IBAN validation, and legal requirements.
/tf/active/vicechatdev/invoice_extraction/validators/be_validator.py
10 - 385
complex
Purpose
This class validates invoice data extracted from Belgian invoices according to Belgian tax and legal requirements. It checks VAT number formats (BE0XXX.XXX.XXX), validates Belgian postal codes and addresses, ensures VAT rates match standard Belgian rates (21%, 12%, 6%, 0%), validates Belgian IBAN formats, verifies EUR currency usage, checks VAT calculations, and ensures compliance with Belgian legal invoice requirements such as mandatory fields and VAT regime indications.
Source Code
class BEValidator(BaseValidator):
"""
Belgium-specific invoice data validator.
Implements validation rules specific to Belgian invoices:
- Belgian VAT number format validation
- Belgian address verification
- EU VAT rate consistency
- IBAN format validation
- Belgian invoice requirements
"""
def __init__(self, config=None):
super().__init__(config)
# Belgian-specific required fields
be_required = {
'vendor.vat_number': 'critical', # Belgian VAT number is required
'amounts.vat': 'critical', # VAT amount must be present
'amounts.vat_rate': 'important', # VAT rate is important but not always critical
'payment.iban': 'important' # IBAN is important for Belgian invoices
}
# Update required fields with Belgian-specific ones
self.required_fields.update(be_required)
# Belgian VAT rates
self.be_vat_rates = self.config.get('be_vat_rates', [21, 12, 6, 0])
# Belgian VAT number regex (BE0XXX.XXX.XXX or variants)
self.be_vat_regex = r'^BE\s*0?\d{3}[.\s]?\d{3}[.\s]?\d{3}$'
# IBAN regex (Belgian IBANs start with BE)
self.be_iban_regex = r'^BE\d{2}(?:\s*\d{4}){3}$'
# Belgian postal code regex
self.be_postal_code_regex = r'\b[1-9]\d{3}\b'
def _entity_specific_validation(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Perform Belgium-specific validation.
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
# 1. VAT number format validation
self._validate_be_vat_number(extraction_result, result)
# 2. Address verification (check for Belgian postal codes)
self._validate_be_address(extraction_result, result)
# 3. VAT rate validation
self._validate_vat_rate(extraction_result, result)
# 4. IBAN validation
self._validate_be_iban(extraction_result, result)
# 5. Validate currency is EUR
self._validate_be_currency(extraction_result, result)
# 6. Additional Belgian-specific calculations/validations
self._validate_vat_calculation(extraction_result, result)
# 7. Belgian legal requirements
self._validate_be_legal_requirements(extraction_result, result)
def _validate_be_vat_number(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Validate Belgian VAT number format.
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
vat_number = self._get_nested_field(extraction_result, 'vendor.vat_number')
if vat_number:
# Standardize format for comparison
clean_vat = vat_number.upper().strip()
# Check if format matches Belgian VAT number
if not re.match(self.be_vat_regex, clean_vat, re.IGNORECASE):
result.add_issue(
'vendor.vat_number',
f"Invalid Belgian VAT number format: {vat_number}. " +
"Should be BE0XXX.XXX.XXX or similar.",
'error'
)
return
# Ensure it starts with BE
if not clean_vat.startswith('BE'):
result.add_issue(
'vendor.vat_number',
f"Belgian VAT number must start with 'BE', got: {vat_number}",
'error'
)
return
# Extract digits for validation
digits = re.sub(r'[^0-9]', '', clean_vat)
# Belgian VAT should start with a 0 after the BE
if not (len(digits) >= 9 and digits[0] == '0'):
result.add_issue(
'vendor.vat_number',
f"Belgian VAT number should have a leading 0 after 'BE', got: {vat_number}",
'warning'
)
# Belgian VAT should have 9 or 10 digits after BE
if not (9 <= len(digits) <= 10):
result.add_issue(
'vendor.vat_number',
f"Belgian VAT number should have 9 or 10 digits after 'BE', got: {len(digits)}",
'warning'
)
def _validate_be_address(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Validate Belgian address (checks for Belgian postal code pattern).
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
address = self._get_nested_field(extraction_result, 'vendor.address')
if address:
# Check if address contains a Belgian postal code (1000-9999)
postcodes = re.findall(self.be_postal_code_regex, address)
if not postcodes:
result.add_issue(
'vendor.address',
"No valid Belgian postal code found in address",
'warning'
)
# Check for Belgian-specific terms
be_terms = [
'BELGIUM', 'BELGIQUE', 'BELGIË', 'BELGIE',
'BRUXELLES', 'BRUSSEL', 'BRUSSELS',
'ANTWERPEN', 'ANVERS', 'GENT', 'GAND', 'LIÈGE', 'LUIK'
]
has_be_term = any(term in address.upper() for term in be_terms)
if not has_be_term and not postcodes:
result.add_issue(
'vendor.address',
"Address does not appear to be from Belgium",
'warning'
)
def _validate_vat_rate(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Validate VAT rate against known Belgian rates.
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
vat_rate = self._get_nested_field(extraction_result, 'amounts.vat_rate')
if vat_rate is not None:
try:
vat_rate_float = float(vat_rate)
# Check if VAT rate is one of the standard Belgian rates
closest_rate = min(self.be_vat_rates, key=lambda x: abs(x - vat_rate_float))
# If not close to a standard rate, flag it
if abs(closest_rate - vat_rate_float) > 1.0: # Allow 1% margin of error
result.add_issue(
'amounts.vat_rate',
f"Unusual VAT rate: {vat_rate}%. Belgian standard rates are: " +
f"{', '.join(map(str, self.be_vat_rates))}%",
'warning'
)
except (ValueError, TypeError):
result.add_issue(
'amounts.vat_rate',
f"Invalid VAT rate format: {vat_rate}",
'warning'
)
def _validate_be_iban(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Validate Belgian IBAN format.
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
iban = self._get_nested_field(extraction_result, 'payment.iban')
if iban:
# Remove spaces for standardization
clean_iban = re.sub(r'\s+', '', iban.upper())
# Check if it's a Belgian IBAN
if not clean_iban.startswith('BE'):
result.add_issue(
'payment.iban',
f"IBAN does not appear to be Belgian (should start with BE): {iban}",
'warning'
)
# Check length (Belgian IBAN is 16 characters: BE + 2 check digits + 12 account digits)
if len(clean_iban) != 16:
result.add_issue(
'payment.iban',
f"Invalid Belgian IBAN length: {len(clean_iban)}. Should be 16 characters.",
'warning'
)
# Check format using regex
if not re.match(r'^BE\d{2}\d{12}$', clean_iban):
result.add_issue(
'payment.iban',
f"Invalid Belgian IBAN format: {iban}",
'warning'
)
# If we have the full IBAN, we can do a checksum validation
if len(clean_iban) == 16:
# Move first 4 chars to the end
rearranged = clean_iban[4:] + clean_iban[:4]
# Convert letters to numbers (A=10, B=11, etc.)
numeric = ''
for char in rearranged:
if char.isalpha():
numeric += str(ord(char) - ord('A') + 10)
else:
numeric += char
# Check if divisible by 97
if int(numeric) % 97 != 1:
result.add_issue(
'payment.iban',
f"IBAN checksum validation failed for: {iban}",
'warning'
)
def _validate_be_currency(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Validate that the currency is EUR for Belgian invoices.
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
currency = self._get_nested_field(extraction_result, 'amounts.currency')
if currency:
# Normalize currency code
currency = currency.upper().strip()
# Check if it's Euro
euro_currencies = ['EUR', '€', 'EURO', 'EUROS']
if not any(euro_curr in currency for euro_curr in euro_currencies):
result.add_issue(
'amounts.currency',
f"Non-Euro currency detected: {currency}. Expected EUR for Belgian invoice.",
'warning'
)
def _validate_vat_calculation(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Validate that the VAT calculation is consistent with the VAT rate.
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
subtotal = self._get_nested_field(extraction_result, 'amounts.subtotal')
vat = self._get_nested_field(extraction_result, 'amounts.vat')
vat_rate = self._get_nested_field(extraction_result, 'amounts.vat_rate')
if subtotal is not None and vat is not None and vat_rate is not None:
try:
subtotal_float = float(subtotal)
vat_float = float(vat)
vat_rate_float = float(vat_rate) / 100 # Convert percentage to decimal
# Calculate expected VAT
expected_vat = subtotal_float * vat_rate_float
# Compare with tolerance (higher tolerance for Belgian invoices due to rounding rules)
if abs(expected_vat - vat_float) > max(0.02, subtotal_float * 0.015): # 2 cents or 1.5% tolerance
result.add_issue(
'amounts.vat',
f"VAT amount ({vat_float}) doesn't match the calculated VAT " +
f"({expected_vat:.2f}) based on subtotal ({subtotal_float}) " +
f"and VAT rate ({vat_rate_float*100}%)",
'warning'
)
except (ValueError, TypeError):
# Already handled by type validation
pass
def _validate_be_legal_requirements(self, extraction_result: Dict[str, Any],
result: ValidationResult) -> None:
"""
Validate Belgian legal invoice requirements.
Args:
extraction_result: Dictionary of extracted invoice fields
result: ValidationResult to add issues to
"""
# 1. Check for invoice date (mandatory in Belgium)
issue_date = self._get_nested_field(extraction_result, 'invoice.issue_date')
if not issue_date:
result.add_issue(
'invoice.issue_date',
"Invoice date is mandatory for Belgian invoices",
'error'
)
# 2. Check for consecutive invoice number (mandatory in Belgium)
invoice_number = self._get_nested_field(extraction_result, 'invoice.number')
if not invoice_number:
result.add_issue(
'invoice.number',
"Invoice number is mandatory for Belgian invoices",
'error'
)
# 3. Check for VAT number (mandatory for VAT-registered businesses)
vat_number = self._get_nested_field(extraction_result, 'vendor.vat_number')
if not vat_number:
result.add_issue(
'vendor.vat_number',
"VAT number is mandatory for Belgian invoices from VAT-registered businesses",
'error'
)
# 4. Check for vendor name and address (mandatory)
vendor_name = self._get_nested_field(extraction_result, 'vendor.name')
vendor_address = self._get_nested_field(extraction_result, 'vendor.address')
if not vendor_name or not vendor_address:
result.add_issue(
'vendor',
"Vendor name and address are mandatory for Belgian invoices",
'error'
)
# 5. Check for VAT regime indication
vat_regime = self._get_nested_field(extraction_result, 'amounts.vat_regime')
vat_rate = self._get_nested_field(extraction_result, 'amounts.vat_rate')
if vat_rate == 0 and not vat_regime:
result.add_issue(
'amounts.vat_regime',
"For 0% VAT, a VAT regime indication is required (exemption reason, reverse charge, etc.)",
'warning'
)
# 6. Check for payment information
payment_info = self._get_nested_field(extraction_result, 'payment')
if not payment_info or not any(payment_info.get(k) for k in ['iban', 'payment_terms']):
result.add_issue(
'payment',
"Payment information is typically required on Belgian invoices",
'warning'
)
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
BaseValidator | - |
Parameter Details
config: Optional configuration dictionary that can override default settings. Can include 'be_vat_rates' key to specify custom Belgian VAT rates (defaults to [21, 12, 6, 0]). Also inherits configuration options from BaseValidator parent class.
Return Value
Instantiation returns a BEValidator object configured for Belgian invoice validation. The main validation method (_entity_specific_validation) does not return a value but modifies the ValidationResult object passed to it by adding validation issues. Each validation sub-method also modifies the ValidationResult object in place.
Class Interface
Methods
__init__(self, config=None)
Purpose: Initialize the BEValidator with Belgian-specific validation rules and configuration
Parameters:
config: Optional dictionary containing configuration overrides, particularly 'be_vat_rates' for custom VAT rates
Returns: None (constructor)
_entity_specific_validation(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Main validation orchestrator that calls all Belgian-specific validation methods in sequence
Parameters:
extraction_result: Dictionary containing extracted invoice fields with nested structureresult: ValidationResult object to accumulate validation issues
Returns: None (modifies result object in place)
_validate_be_vat_number(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Validate Belgian VAT number format (BE0XXX.XXX.XXX), structure, and digit requirements
Parameters:
extraction_result: Dictionary containing extracted invoice fieldsresult: ValidationResult object to add VAT number validation issues
Returns: None (modifies result object in place)
_validate_be_address(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Validate Belgian address by checking for Belgian postal codes (1000-9999) and Belgian location terms
Parameters:
extraction_result: Dictionary containing extracted invoice fieldsresult: ValidationResult object to add address validation issues
Returns: None (modifies result object in place)
_validate_vat_rate(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Validate that VAT rate matches one of the standard Belgian rates (21%, 12%, 6%, 0%) within 1% tolerance
Parameters:
extraction_result: Dictionary containing extracted invoice fieldsresult: ValidationResult object to add VAT rate validation issues
Returns: None (modifies result object in place)
_validate_be_iban(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Validate Belgian IBAN format, length (16 characters), and checksum using mod-97 algorithm
Parameters:
extraction_result: Dictionary containing extracted invoice fieldsresult: ValidationResult object to add IBAN validation issues
Returns: None (modifies result object in place)
_validate_be_currency(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Validate that the invoice currency is EUR (Euro) as expected for Belgian invoices
Parameters:
extraction_result: Dictionary containing extracted invoice fieldsresult: ValidationResult object to add currency validation issues
Returns: None (modifies result object in place)
_validate_vat_calculation(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Validate that VAT amount matches calculated VAT (subtotal * rate) within tolerance (2 cents or 1.5%)
Parameters:
extraction_result: Dictionary containing extracted invoice fieldsresult: ValidationResult object to add VAT calculation validation issues
Returns: None (modifies result object in place)
_validate_be_legal_requirements(self, extraction_result: Dict[str, Any], result: ValidationResult) -> None
Purpose: Validate Belgian legal invoice requirements including mandatory fields (date, number, VAT number, vendor info, payment info) and VAT regime indications
Parameters:
extraction_result: Dictionary containing extracted invoice fieldsresult: ValidationResult object to add legal requirement validation issues
Returns: None (modifies result object in place)
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
required_fields |
Dict[str, str] | Dictionary mapping field paths to severity levels ('critical', 'important') for required fields, inherited from BaseValidator and extended with Belgian-specific requirements | instance |
be_vat_rates |
List[int] | List of standard Belgian VAT rates in percentages, defaults to [21, 12, 6, 0] | instance |
be_vat_regex |
str | Regular expression pattern for validating Belgian VAT number format (BE0XXX.XXX.XXX or variants) | instance |
be_iban_regex |
str | Regular expression pattern for validating Belgian IBAN format (BE followed by digits) | instance |
be_postal_code_regex |
str | Regular expression pattern for validating Belgian postal codes (1000-9999) | instance |
config |
Dict[str, Any] | Configuration dictionary inherited from BaseValidator, contains validation settings and overrides | instance |
Dependencies
reloggingtypingdatetime
Required Imports
import re
import logging
from typing import Dict, Any
from datetime import datetime
from validators.base_validator import BaseValidator, ValidationResult
Usage Example
from validators.be_validator import BEValidator
from validators.base_validator import ValidationResult
# Initialize validator with optional config
config = {'be_vat_rates': [21, 12, 6, 0]}
validator = BEValidator(config=config)
# Prepare extraction result from invoice
extraction_result = {
'vendor': {
'vat_number': 'BE0123.456.789',
'name': 'Example BVBA',
'address': '1000 Brussels, Belgium'
},
'amounts': {
'subtotal': 100.00,
'vat': 21.00,
'vat_rate': 21,
'currency': 'EUR'
},
'payment': {
'iban': 'BE68 5390 0754 7034'
},
'invoice': {
'number': 'INV-2024-001',
'issue_date': '2024-01-15'
}
}
# Create validation result object
result = ValidationResult()
# Perform validation
validator._entity_specific_validation(extraction_result, result)
# Check validation results
if result.is_valid:
print('Invoice is valid')
else:
for issue in result.issues:
print(f'{issue.severity}: {issue.field} - {issue.message}')
Best Practices
- Always instantiate with appropriate config if custom VAT rates are needed
- Call _entity_specific_validation with a properly initialized ValidationResult object
- Ensure extraction_result dictionary follows the expected nested structure (vendor.vat_number, amounts.vat, etc.)
- The validator modifies the ValidationResult object in place, so check result.issues after validation
- This class is designed to be called from a parent validation workflow, not standalone
- All validation methods are protected (_method_name) indicating they are internal to the validation process
- The validator uses tolerance margins for VAT calculations (2 cents or 1.5%) to account for rounding
- Belgian VAT numbers must start with 'BE' followed by a leading 0 and 9-10 digits total
- Belgian IBANs are exactly 16 characters: BE + 2 check digits + 12 account digits
- The validator performs checksum validation on IBANs using the mod-97 algorithm
- Validation severity levels are 'error', 'warning', and 'critical' - handle appropriately in calling code
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class TestBEValidator 86.5% similar
-
class BEExtractor 77.0% similar
-
class UKValidator 76.7% similar
-
class TestBEExtractor 73.6% similar
-
class BaseValidator 71.7% similar