TestAUExtractor - Code Extractor

class TestAUExtractor

Maturity: 52

Unit test class for testing the AUExtractor class, which extracts data from Australian invoices including ABN, GST, and payment details.

File:
/tf/active/vicechatdev/invoice_extraction/tests/test_extractors.py

Lines:
477 - 650

Complexity:
moderate

Purpose

This test class validates the functionality of the AUExtractor class by testing Australian invoice data extraction, ABN formatting, and GST handling. It uses mocked LLM clients to simulate invoice processing without requiring actual API calls. The tests verify that Australian-specific fields like ABN (Australian Business Number), GST (Goods and Services Tax), BSB (Bank State Branch), and currency formatting are correctly extracted and formatted.

Source Code

class TestAUExtractor(unittest.TestCase):
    """Test cases for the AUExtractor class."""
    
    def setUp(self):
        """Set up test environment before each test."""
        self.config = {
            'confidence_threshold': 0.7,
            'llm': {
                'provider': 'test',
                'model': 'test-model'
            }
        }
        
        # Create a mock LLM client
        self.mock_llm = MockLLMClient({
            'invoice metadata': json.dumps({
                "number": "INV-2023-789",
                "issue_date": "15/01/2023",
                "due_date": "15/02/2023",
                "po_number": "PO-4567",
                "reference": "REF-8901"
            }),
            'vendor data': json.dumps({
                "name": "Australian Test Pty Ltd",
                "abn": "12 345 678 901",
                "address": "123 Sydney Road, Sydney NSW 2000, Australia",
                "contact": "contact@autestvendor.com.au"
            }),
            'amounts': json.dumps({
                "subtotal": 500.00,
                "total": 550.00,
                "currency": "AUD"
            }),
            'tax data': json.dumps({
                "gst": 50.00,
                "tax_rate": 10,
                "tax_status": "GST inclusive"
            }),
            'payment data': json.dumps({
                "bank_name": "Commonwealth Bank",
                "bsb": "062-000",
                "account_number": "12345678",
                "account_name": "Australian Test Pty Ltd",
                "payment_terms": "30 days",
                "reference": "INV-2023-789"
            }),
            'line items': json.dumps([
                {
                    "description": "Test Product 1",
                    "quantity": 2,
                    "unit_price": 100.00,
                    "gst_amount": 20.00,
                    "amount": 220.00,
                    "gst_applicable": True
                },
                {
                    "description": "Test Product 2",
                    "quantity": 3,
                    "unit_price": 100.00,
                    "gst_amount": 30.00,
                    "amount": 330.00,
                    "gst_applicable": True
                }
            ])
        })
        
        # Sample Australian invoice
        self.au_doc = {
            'text': 'TAX INVOICE #INV-2023-789\nDate: 15/01/2023\nAustralian Test Pty Ltd\n'
                   'ABN: 12 345 678 901\n123 Sydney Road, Sydney NSW 2000, Australia\n'
                   'Subtotal: $500.00\nGST (10%): $50.00\nTotal: $550.00',
            'pages': [
                {
                    'text': 'TAX INVOICE #INV-2023-789\nDate: 15/01/2023\nAustralian Test Pty Ltd\n'
                           'ABN: 12 345 678 901\n123 Sydney Road, Sydney NSW 2000, Australia',
                    'width': 800,
                    'height': 1000,
                    'tables': []
                },
                {
                    'text': 'Subtotal: $500.00\nGST (10%): $50.00\nTotal: $550.00',
                    'width': 800,
                    'height': 1000,
                    'tables': []
                }
            ]
        }
    
    @patch('extractors.au_extractor.LLMClient')
    def test_au_extract(self, mock_llm_client):
        """Test extraction of data from Australian invoice."""
        # Setup the mock
        mock_llm_client.return_value = self.mock_llm
        
        # Create extractor
        au_extractor = AUExtractor(self.config)
        
        # Extract data
        result = au_extractor.extract(self.au_doc, 'en')
        
        # Check that we have the expected sections
        self.assertIn('invoice', result)
        self.assertIn('vendor', result)
        self.assertIn('amounts', result)
        self.assertIn('payment', result)
        self.assertIn('line_items', result)
        
        # Check specific fields
        self.assertEqual(result['invoice']['number'], 'INV-2023-789')
        self.assertEqual(result['vendor']['abn'], '12 345 678 901')
        self.assertEqual(result['amounts']['total'], 550.00)
        self.assertEqual(result['amounts']['gst'], 50.00)
        self.assertEqual(result['payment']['bsb'], '062-000')
        self.assertEqual(len(result['line_items']), 2)
        
        # Check confidence score is calculated
        self.assertIn('confidence', result)
        self.assertIsInstance(result['confidence'], float)
    
    @patch('extractors.au_extractor.LLMClient')
    def test_au_abn_formatting(self, mock_llm_client):
        """Test that Australian ABN is properly formatted."""
        # Setup the mock with malformatted ABN
        mock_llm = MockLLMClient({
            'vendor data': json.dumps({
                "name": "Australian Test Pty Ltd",
                "abn": "12345678901",  # Malformatted (no spaces)
                "address": "123 Sydney Road, Sydney NSW 2000, Australia"
            }),
            # Add minimum required responses for other fields
            'invoice metadata': json.dumps({"number": "12345", "issue_date": "15/01/2023"}),
            'amounts': json.dumps({"subtotal": 100, "total": 110, "gst": 10, "currency": "AUD"}),
            'tax data': json.dumps({"gst": 10, "tax_rate": 10}),
            'line items': json.dumps([])
        })
        mock_llm_client.return_value = mock_llm
        
        # Create extractor
        au_extractor = AUExtractor(self.config)
        
        # Extract data
        result = au_extractor.extract(self.au_doc, 'en')
        
        # Check that ABN was formatted correctly
        self.assertEqual(result['vendor']['abn'], '12 345 678 901')
    
    @patch('extractors.au_extractor.LLMClient')
    def test_au_gst_handling(self, mock_llm_client):
        """Test that Australian GST is properly handled."""
        # Setup the mock with GST-specific information
        mock_llm = MockLLMClient({
            'tax data': json.dumps({
                "gst": 10.00,
                "tax_rate": 10,
                "tax_status": "GST inclusive"
            }),
            # Add minimum required responses for other fields
            'invoice metadata': json.dumps({"number": "12345", "issue_date": "15/01/2023"}),
            'vendor data': json.dumps({"name": "Australian Test", "abn": "12 345 678 901"}),
            'amounts': json.dumps({"subtotal": 100, "total": 110, "currency": "AUD"}),
            'line items': json.dumps([])
        })
        mock_llm_client.return_value = mock_llm
        
        # Create extractor
        au_extractor = AUExtractor(self.config)
        
        # Extract data
        result = au_extractor.extract(self.au_doc, 'en')
        
        # Check that both GST and tax fields are populated
        self.assertEqual(result['amounts']['gst'], 10.00)
        self.assertEqual(result['amounts']['tax'], 10.00)
        self.assertEqual(result['amounts']['tax_rate'], 10)

Parameters

Name	Type	Default	Kind
`bases`	unittest.TestCase	-

Parameter Details

bases: Inherits from unittest.TestCase to provide testing framework functionality including assertions, test setup/teardown, and test discovery

Return Value

As a test class, it doesn't return values directly. Test methods use assertions to validate behavior. The setUp method initializes test fixtures that are used across test methods. Test methods return None but raise AssertionError if tests fail.

Class Interface

Methods

`setUp(self) -> None`

Purpose: Initialize test fixtures before each test method runs, including mock LLM client and sample Australian invoice data

Returns: None - sets up instance attributes self.config, self.mock_llm, and self.au_doc

`test_au_extract(self, mock_llm_client) -> None`

Purpose: Test complete extraction of data from an Australian invoice including invoice metadata, vendor details, amounts, payment info, and line items

Parameters:

mock_llm_client: Mocked LLMClient class provided by @patch decorator

Returns: None - uses assertions to validate extraction results

`test_au_abn_formatting(self, mock_llm_client) -> None`

Purpose: Test that Australian Business Numbers (ABN) are properly formatted with spaces in the pattern 'XX XXX XXX XXX'

Parameters:

mock_llm_client: Mocked LLMClient class provided by @patch decorator

Returns: None - asserts that malformatted ABN '12345678901' is converted to '12 345 678 901'

`test_au_gst_handling(self, mock_llm_client) -> None`

Purpose: Test that Australian Goods and Services Tax (GST) is properly extracted and that both 'gst' and 'tax' fields are populated with the same value

Parameters:

mock_llm_client: Mocked LLMClient class provided by @patch decorator

Returns: None - asserts that GST amount, tax amount, and tax rate are correctly extracted

Attributes

Name	Type	Description	Scope
`config`	dict	Configuration dictionary containing confidence_threshold and llm settings (provider and model)	instance
`mock_llm`	MockLLMClient	Mock LLM client instance pre-configured with sample responses for invoice metadata, vendor data, amounts, tax data, payment data, and line items	instance
`au_doc`	dict	Sample Australian invoice document structure containing text and pages with invoice details including ABN, GST, and payment information	instance

Dependencies

unittest
unittest.mock
json
logging
os
pathlib
datetime
extractors.base_extractor
extractors.uk_extractor
extractors.be_extractor
extractors.au_extractor

Required Imports

import unittest
from unittest.mock import patch
from unittest.mock import MagicMock
import json
import os
import logging
from pathlib import Path
import datetime
from extractors.base_extractor import BaseExtractor
from extractors.uk_extractor import UKExtractor
from extractors.be_extractor import BEExtractor
from extractors.au_extractor import AUExtractor

Usage Example

import unittest
from unittest.mock import patch
import json
from extractors.au_extractor import AUExtractor

# Run a specific test
if __name__ == '__main__':
    suite = unittest.TestLoader().loadTestsFromTestCase(TestAUExtractor)
    unittest.TextTestRunner(verbosity=2).run(suite)

# Or run individual test
test = TestAUExtractor()
test.setUp()
with patch('extractors.au_extractor.LLMClient') as mock_llm:
    mock_llm.return_value = test.mock_llm
    test.test_au_extract(mock_llm)

# Run all tests in the class
python -m unittest test_module.TestAUExtractor

Best Practices

Always call setUp() before running tests to initialize test fixtures and mock objects
Use the @patch decorator to mock external dependencies like LLMClient to avoid actual API calls
Each test method should be independent and not rely on state from other tests
Mock responses should include all required fields to prevent KeyError exceptions
Test both successful extraction and edge cases like malformatted data (e.g., ABN without spaces)
Verify both the presence of expected fields and their correct values
Check that confidence scores are calculated and returned as floats
Test Australian-specific formatting rules (ABN with spaces, GST calculations, BSB format)
Use descriptive test method names that clearly indicate what is being tested
Include assertions for data types as well as values to ensure type safety

Similar Components

AI-powered semantic similarity - components with related functionality:

class TestAUValidator 87.1% similar

Unit test class for validating the AUValidator class, which validates Australian invoice extraction results including ABN, GST, banking details, and tax invoice requirements.
From: /tf/active/vicechatdev/invoice_extraction/tests/test_validators.py
class AUExtractor 83.1% similar

Australia-specific invoice data extractor that uses LLM (Large Language Model) to extract structured invoice data from Australian tax invoices, handling ABN, ACN, GST, BSB numbers and Australian date formats.
From: /tf/active/vicechatdev/invoice_extraction/extractors/au_extractor.py
class TestUKExtractor 79.3% similar

Unit test class for testing the UKExtractor class, which extracts structured data from UK invoices including VAT numbers, dates, amounts, and line items.
From: /tf/active/vicechatdev/invoice_extraction/tests/test_extractors.py
class AUValidator 76.1% similar

Australia-specific invoice data validator that extends BaseValidator to implement validation rules for Australian invoices including ABN validation, GST calculations, and Australian tax invoice requirements.
From: /tf/active/vicechatdev/invoice_extraction/validators/au_validator.py
class TestBEExtractor 74.2% similar

Unit test class for testing the BEExtractor class, which extracts structured data from Belgian invoices using LLM-based extraction.
From: /tf/active/vicechatdev/invoice_extraction/tests/test_extractors.py

← Back to Browse

Assistant

Hi! I can help improve this code. Tell me what you'd like to enhance (e.g., "add error handling", "optimize performance", "improve readability", "add type hints").

Code Comparison

Original Code

                            class TestAUExtractor(unittest.TestCase):
    """Test cases for the AUExtractor class."""
    
    def setUp(self):
        """Set up test environment before each test."""
        self.config = {
            'confidence_threshold': 0.7,
            'llm': {
                'provider': 'test',
                'model': 'test-model'
            }
        }
        
        # Create a mock LLM client
        self.mock_llm = MockLLMClient({
            'invoice metadata': json.dumps({
                "number": "INV-2023-789",
                "issue_date": "15/01/2023",
                "due_date": "15/02/2023",
                "po_number": "PO-4567",
                "reference": "REF-8901"
            }),
            'vendor data': json.dumps({
                "name": "Australian Test Pty Ltd",
                "abn": "12 345 678 901",
                "address": "123 Sydney Road, Sydney NSW 2000, Australia",
                "contact": "contact@autestvendor.com.au"
            }),
            'amounts': json.dumps({
                "subtotal": 500.00,
                "total": 550.00,
                "currency": "AUD"
            }),
            'tax data': json.dumps({
                "gst": 50.00,
                "tax_rate": 10,
                "tax_status": "GST inclusive"
            }),
            'payment data': json.dumps({
                "bank_name": "Commonwealth Bank",
                "bsb": "062-000",
                "account_number": "12345678",
                "account_name": "Australian Test Pty Ltd",
                "payment_terms": "30 days",
                "reference": "INV-2023-789"
            }),
            'line items': json.dumps([
                {
                    "description": "Test Product 1",
                    "quantity": 2,
                    "unit_price": 100.00,
                    "gst_amount": 20.00,
                    "amount": 220.00,
                    "gst_applicable": True
                },
                {
                    "description": "Test Product 2",
                    "quantity": 3,
                    "unit_price": 100.00,
                    "gst_amount": 30.00,
                    "amount": 330.00,
                    "gst_applicable": True
                }
            ])
        })
        
        # Sample Australian invoice
        self.au_doc = {
            'text': 'TAX INVOICE #INV-2023-789\nDate: 15/01/2023\nAustralian Test Pty Ltd\n'
                   'ABN: 12 345 678 901\n123 Sydney Road, Sydney NSW 2000, Australia\n'
                   'Subtotal: $500.00\nGST (10%): $50.00\nTotal: $550.00',
            'pages': [
                {
                    'text': 'TAX INVOICE #INV-2023-789\nDate: 15/01/2023\nAustralian Test Pty Ltd\n'
                           'ABN: 12 345 678 901\n123 Sydney Road, Sydney NSW 2000, Australia',
                    'width': 800,
                    'height': 1000,
                    'tables': []
                },
                {
                    'text': 'Subtotal: $500.00\nGST (10%): $50.00\nTotal: $550.00',
                    'width': 800,
                    'height': 1000,
                    'tables': []
                }
            ]
        }
    
    @patch('extractors.au_extractor.LLMClient')
    def test_au_extract(self, mock_llm_client):
        """Test extraction of data from Australian invoice."""
        # Setup the mock
        mock_llm_client.return_value = self.mock_llm
        
        # Create extractor
        au_extractor = AUExtractor(self.config)
        
        # Extract data
        result = au_extractor.extract(self.au_doc, 'en')
        
        # Check that we have the expected sections
        self.assertIn('invoice', result)
        self.assertIn('vendor', result)
        self.assertIn('amounts', result)
        self.assertIn('payment', result)
        self.assertIn('line_items', result)
        
        # Check specific fields
        self.assertEqual(result['invoice']['number'], 'INV-2023-789')
        self.assertEqual(result['vendor']['abn'], '12 345 678 901')
        self.assertEqual(result['amounts']['total'], 550.00)
        self.assertEqual(result['amounts']['gst'], 50.00)
        self.assertEqual(result['payment']['bsb'], '062-000')
        self.assertEqual(len(result['line_items']), 2)
        
        # Check confidence score is calculated
        self.assertIn('confidence', result)
        self.assertIsInstance(result['confidence'], float)
    
    @patch('extractors.au_extractor.LLMClient')
    def test_au_abn_formatting(self, mock_llm_client):
        """Test that Australian ABN is properly formatted."""
        # Setup the mock with malformatted ABN
        mock_llm = MockLLMClient({
            'vendor data': json.dumps({
                "name": "Australian Test Pty Ltd",
                "abn": "12345678901",  # Malformatted (no spaces)
                "address": "123 Sydney Road, Sydney NSW 2000, Australia"
            }),
            # Add minimum required responses for other fields
            'invoice metadata': json.dumps({"number": "12345", "issue_date": "15/01/2023"}),
            'amounts': json.dumps({"subtotal": 100, "total": 110, "gst": 10, "currency": "AUD"}),
            'tax data': json.dumps({"gst": 10, "tax_rate": 10}),
            'line items': json.dumps([])
        })
        mock_llm_client.return_value = mock_llm
        
        # Create extractor
        au_extractor = AUExtractor(self.config)
        
        # Extract data
        result = au_extractor.extract(self.au_doc, 'en')
        
        # Check that ABN was formatted correctly
        self.assertEqual(result['vendor']['abn'], '12 345 678 901')
    
    @patch('extractors.au_extractor.LLMClient')
    def test_au_gst_handling(self, mock_llm_client):
        """Test that Australian GST is properly handled."""
        # Setup the mock with GST-specific information
        mock_llm = MockLLMClient({
            'tax data': json.dumps({
                "gst": 10.00,
                "tax_rate": 10,
                "tax_status": "GST inclusive"
            }),
            # Add minimum required responses for other fields
            'invoice metadata': json.dumps({"number": "12345", "issue_date": "15/01/2023"}),
            'vendor data': json.dumps({"name": "Australian Test", "abn": "12 345 678 901"}),
            'amounts': json.dumps({"subtotal": 100, "total": 110, "currency": "AUD"}),
            'line items': json.dumps([])
        })
        mock_llm_client.return_value = mock_llm
        
        # Create extractor
        au_extractor = AUExtractor(self.config)
        
        # Extract data
        result = au_extractor.extract(self.au_doc, 'en')
        
        # Check that both GST and tax fields are populated
        self.assertEqual(result['amounts']['gst'], 10.00)
        self.assertEqual(result['amounts']['tax'], 10.00)
        self.assertEqual(result['amounts']['tax_rate'], 10)
                        

Improved Code

🔍 Code Extractor

class TestAUExtractor

Purpose

Source Code

Parameters

Parameter Details

Return Value

Class Interface

Methods

`setUp(self) -> None`

`test_au_extract(self, mock_llm_client) -> None`

`test_au_abn_formatting(self, mock_llm_client) -> None`

`test_au_gst_handling(self, mock_llm_client) -> None`

Attributes

Dependencies

Required Imports

Usage Example

Best Practices

Tags

Similar Components

class TestAUValidator 87.1% similar

class AUExtractor 83.1% similar

class TestUKExtractor 79.3% similar

class AUValidator 76.1% similar

class TestBEExtractor 74.2% similar

class TestAUExtractor

Purpose

Source Code

Parameters

Parameter Details

Return Value

Class Interface

Methods

setUp(self) -> None

test_au_extract(self, mock_llm_client) -> None

test_au_abn_formatting(self, mock_llm_client) -> None

test_au_gst_handling(self, mock_llm_client) -> None

Attributes

Dependencies

Required Imports

Usage Example

Best Practices

Tags

Similar Components

class TestAUValidator 87.1% similar

class AUExtractor 83.1% similar

class TestUKExtractor 79.3% similar

class AUValidator 76.1% similar

class TestBEExtractor 74.2% similar

✨ Improve Code: TestAUExtractor

Code Comparison

`setUp(self) -> None`

`test_au_extract(self, mock_llm_client) -> None`

`test_au_abn_formatting(self, mock_llm_client) -> None`

`test_au_gst_handling(self, mock_llm_client) -> None`