class TestAUExtractor
Unit test class for testing the AUExtractor class, which extracts data from Australian invoices including ABN, GST, and payment details.
/tf/active/vicechatdev/invoice_extraction/tests/test_extractors.py
477 - 650
moderate
Purpose
This test class validates the functionality of the AUExtractor class by testing Australian invoice data extraction, ABN formatting, and GST handling. It uses mocked LLM clients to simulate invoice processing without requiring actual API calls. The tests verify that Australian-specific fields like ABN (Australian Business Number), GST (Goods and Services Tax), BSB (Bank State Branch), and currency formatting are correctly extracted and formatted.
Source Code
class TestAUExtractor(unittest.TestCase):
"""Test cases for the AUExtractor class."""
def setUp(self):
"""Set up test environment before each test."""
self.config = {
'confidence_threshold': 0.7,
'llm': {
'provider': 'test',
'model': 'test-model'
}
}
# Create a mock LLM client
self.mock_llm = MockLLMClient({
'invoice metadata': json.dumps({
"number": "INV-2023-789",
"issue_date": "15/01/2023",
"due_date": "15/02/2023",
"po_number": "PO-4567",
"reference": "REF-8901"
}),
'vendor data': json.dumps({
"name": "Australian Test Pty Ltd",
"abn": "12 345 678 901",
"address": "123 Sydney Road, Sydney NSW 2000, Australia",
"contact": "contact@autestvendor.com.au"
}),
'amounts': json.dumps({
"subtotal": 500.00,
"total": 550.00,
"currency": "AUD"
}),
'tax data': json.dumps({
"gst": 50.00,
"tax_rate": 10,
"tax_status": "GST inclusive"
}),
'payment data': json.dumps({
"bank_name": "Commonwealth Bank",
"bsb": "062-000",
"account_number": "12345678",
"account_name": "Australian Test Pty Ltd",
"payment_terms": "30 days",
"reference": "INV-2023-789"
}),
'line items': json.dumps([
{
"description": "Test Product 1",
"quantity": 2,
"unit_price": 100.00,
"gst_amount": 20.00,
"amount": 220.00,
"gst_applicable": True
},
{
"description": "Test Product 2",
"quantity": 3,
"unit_price": 100.00,
"gst_amount": 30.00,
"amount": 330.00,
"gst_applicable": True
}
])
})
# Sample Australian invoice
self.au_doc = {
'text': 'TAX INVOICE #INV-2023-789\nDate: 15/01/2023\nAustralian Test Pty Ltd\n'
'ABN: 12 345 678 901\n123 Sydney Road, Sydney NSW 2000, Australia\n'
'Subtotal: $500.00\nGST (10%): $50.00\nTotal: $550.00',
'pages': [
{
'text': 'TAX INVOICE #INV-2023-789\nDate: 15/01/2023\nAustralian Test Pty Ltd\n'
'ABN: 12 345 678 901\n123 Sydney Road, Sydney NSW 2000, Australia',
'width': 800,
'height': 1000,
'tables': []
},
{
'text': 'Subtotal: $500.00\nGST (10%): $50.00\nTotal: $550.00',
'width': 800,
'height': 1000,
'tables': []
}
]
}
@patch('extractors.au_extractor.LLMClient')
def test_au_extract(self, mock_llm_client):
"""Test extraction of data from Australian invoice."""
# Setup the mock
mock_llm_client.return_value = self.mock_llm
# Create extractor
au_extractor = AUExtractor(self.config)
# Extract data
result = au_extractor.extract(self.au_doc, 'en')
# Check that we have the expected sections
self.assertIn('invoice', result)
self.assertIn('vendor', result)
self.assertIn('amounts', result)
self.assertIn('payment', result)
self.assertIn('line_items', result)
# Check specific fields
self.assertEqual(result['invoice']['number'], 'INV-2023-789')
self.assertEqual(result['vendor']['abn'], '12 345 678 901')
self.assertEqual(result['amounts']['total'], 550.00)
self.assertEqual(result['amounts']['gst'], 50.00)
self.assertEqual(result['payment']['bsb'], '062-000')
self.assertEqual(len(result['line_items']), 2)
# Check confidence score is calculated
self.assertIn('confidence', result)
self.assertIsInstance(result['confidence'], float)
@patch('extractors.au_extractor.LLMClient')
def test_au_abn_formatting(self, mock_llm_client):
"""Test that Australian ABN is properly formatted."""
# Setup the mock with malformatted ABN
mock_llm = MockLLMClient({
'vendor data': json.dumps({
"name": "Australian Test Pty Ltd",
"abn": "12345678901", # Malformatted (no spaces)
"address": "123 Sydney Road, Sydney NSW 2000, Australia"
}),
# Add minimum required responses for other fields
'invoice metadata': json.dumps({"number": "12345", "issue_date": "15/01/2023"}),
'amounts': json.dumps({"subtotal": 100, "total": 110, "gst": 10, "currency": "AUD"}),
'tax data': json.dumps({"gst": 10, "tax_rate": 10}),
'line items': json.dumps([])
})
mock_llm_client.return_value = mock_llm
# Create extractor
au_extractor = AUExtractor(self.config)
# Extract data
result = au_extractor.extract(self.au_doc, 'en')
# Check that ABN was formatted correctly
self.assertEqual(result['vendor']['abn'], '12 345 678 901')
@patch('extractors.au_extractor.LLMClient')
def test_au_gst_handling(self, mock_llm_client):
"""Test that Australian GST is properly handled."""
# Setup the mock with GST-specific information
mock_llm = MockLLMClient({
'tax data': json.dumps({
"gst": 10.00,
"tax_rate": 10,
"tax_status": "GST inclusive"
}),
# Add minimum required responses for other fields
'invoice metadata': json.dumps({"number": "12345", "issue_date": "15/01/2023"}),
'vendor data': json.dumps({"name": "Australian Test", "abn": "12 345 678 901"}),
'amounts': json.dumps({"subtotal": 100, "total": 110, "currency": "AUD"}),
'line items': json.dumps([])
})
mock_llm_client.return_value = mock_llm
# Create extractor
au_extractor = AUExtractor(self.config)
# Extract data
result = au_extractor.extract(self.au_doc, 'en')
# Check that both GST and tax fields are populated
self.assertEqual(result['amounts']['gst'], 10.00)
self.assertEqual(result['amounts']['tax'], 10.00)
self.assertEqual(result['amounts']['tax_rate'], 10)
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
unittest.TestCase | - |
Parameter Details
bases: Inherits from unittest.TestCase to provide testing framework functionality including assertions, test setup/teardown, and test discovery
Return Value
As a test class, it doesn't return values directly. Test methods use assertions to validate behavior. The setUp method initializes test fixtures that are used across test methods. Test methods return None but raise AssertionError if tests fail.
Class Interface
Methods
setUp(self) -> None
Purpose: Initialize test fixtures before each test method runs, including mock LLM client and sample Australian invoice data
Returns: None - sets up instance attributes self.config, self.mock_llm, and self.au_doc
test_au_extract(self, mock_llm_client) -> None
Purpose: Test complete extraction of data from an Australian invoice including invoice metadata, vendor details, amounts, payment info, and line items
Parameters:
mock_llm_client: Mocked LLMClient class provided by @patch decorator
Returns: None - uses assertions to validate extraction results
test_au_abn_formatting(self, mock_llm_client) -> None
Purpose: Test that Australian Business Numbers (ABN) are properly formatted with spaces in the pattern 'XX XXX XXX XXX'
Parameters:
mock_llm_client: Mocked LLMClient class provided by @patch decorator
Returns: None - asserts that malformatted ABN '12345678901' is converted to '12 345 678 901'
test_au_gst_handling(self, mock_llm_client) -> None
Purpose: Test that Australian Goods and Services Tax (GST) is properly extracted and that both 'gst' and 'tax' fields are populated with the same value
Parameters:
mock_llm_client: Mocked LLMClient class provided by @patch decorator
Returns: None - asserts that GST amount, tax amount, and tax rate are correctly extracted
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
config |
dict | Configuration dictionary containing confidence_threshold and llm settings (provider and model) | instance |
mock_llm |
MockLLMClient | Mock LLM client instance pre-configured with sample responses for invoice metadata, vendor data, amounts, tax data, payment data, and line items | instance |
au_doc |
dict | Sample Australian invoice document structure containing text and pages with invoice details including ABN, GST, and payment information | instance |
Dependencies
unittestunittest.mockjsonloggingospathlibdatetimeextractors.base_extractorextractors.uk_extractorextractors.be_extractorextractors.au_extractor
Required Imports
import unittest
from unittest.mock import patch
from unittest.mock import MagicMock
import json
import os
import logging
from pathlib import Path
import datetime
from extractors.base_extractor import BaseExtractor
from extractors.uk_extractor import UKExtractor
from extractors.be_extractor import BEExtractor
from extractors.au_extractor import AUExtractor
Usage Example
import unittest
from unittest.mock import patch
import json
from extractors.au_extractor import AUExtractor
# Run a specific test
if __name__ == '__main__':
suite = unittest.TestLoader().loadTestsFromTestCase(TestAUExtractor)
unittest.TextTestRunner(verbosity=2).run(suite)
# Or run individual test
test = TestAUExtractor()
test.setUp()
with patch('extractors.au_extractor.LLMClient') as mock_llm:
mock_llm.return_value = test.mock_llm
test.test_au_extract(mock_llm)
# Run all tests in the class
python -m unittest test_module.TestAUExtractor
Best Practices
- Always call setUp() before running tests to initialize test fixtures and mock objects
- Use the @patch decorator to mock external dependencies like LLMClient to avoid actual API calls
- Each test method should be independent and not rely on state from other tests
- Mock responses should include all required fields to prevent KeyError exceptions
- Test both successful extraction and edge cases like malformatted data (e.g., ABN without spaces)
- Verify both the presence of expected fields and their correct values
- Check that confidence scores are calculated and returned as floats
- Test Australian-specific formatting rules (ABN with spaces, GST calculations, BSB format)
- Use descriptive test method names that clearly indicate what is being tested
- Include assertions for data types as well as values to ensure type safety
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class TestAUValidator 87.1% similar
-
class AUExtractor 83.1% similar
-
class TestUKExtractor 79.3% similar
-
class AUValidator 76.1% similar
-
class TestBEExtractor 74.2% similar