class InputProcessor
A class that handles extraction and encoding of images from various input formats including PDFs (single or multi-page) and standard image files, converting them to base64-encoded strings with metadata.
/tf/active/vicechatdev/e-ink-llm/input_processor.py
13 - 151
moderate
Purpose
InputProcessor provides a unified interface for processing different image input formats. It can extract images from PDF files (either first page only or all pages), encode standard image files to base64, and return comprehensive metadata about the processed content. The class supports multi-page PDF processing with text extraction and content analysis, making it suitable for document processing pipelines that need to convert visual content for API consumption or further processing.
Source Code
class InputProcessor:
"""Handles extraction and encoding of images from various input formats"""
def __init__(self, enable_multi_page: bool = True, max_pages: int = 50):
"""
Initialize input processor
Args:
enable_multi_page: Enable multi-page PDF processing
max_pages: Maximum pages to process in multi-page mode
"""
self.enable_multi_page = enable_multi_page
self.max_pages = max_pages
self.multi_page_processor = MultiPagePDFProcessor(max_pages=max_pages) if enable_multi_page else None
def extract_image(self, file_path: str) -> Union[Tuple[str, dict], Tuple[List[str], dict]]:
"""
Extract image(s) from PDF or encode image file to base64
Args:
file_path: Path to the input file
Returns:
For single page: Tuple of (base64_encoded_image, metadata_dict)
For multi-page: Tuple of (list_of_base64_images, metadata_dict)
"""
file_path = Path(file_path)
if file_path.suffix.lower() == '.pdf':
if self.enable_multi_page:
return self._extract_multi_page_pdf(file_path)
else:
return self._extract_from_pdf(file_path)
else:
return self._encode_image(file_path)
@staticmethod
def _extract_from_pdf(pdf_path: Path) -> Tuple[str, dict]:
"""Extract first page of PDF as image"""
try:
doc = fitz.open(pdf_path)
page = doc[0] # Get first page
# Render page as image (high DPI for e-ink clarity)
mat = fitz.Matrix(2.0, 2.0) # 2x zoom for better quality
pix = page.get_pixmap(matrix=mat)
# Convert to PIL Image
img_data = pix.tobytes("png")
img = PILImage.open(io.BytesIO(img_data))
# Convert to base64
buffer = io.BytesIO()
img.save(buffer, format='PNG')
img_b64 = base64.b64encode(buffer.getvalue()).decode()
metadata = {
'source_type': 'pdf',
'source_file': str(pdf_path),
'page_count': len(doc),
'dimensions': (img.width, img.height)
}
doc.close()
return img_b64, metadata
except Exception as e:
raise Exception(f"Error processing PDF {pdf_path}: {str(e)}")
def _extract_multi_page_pdf(self, pdf_path: Path) -> Tuple[List[str], dict]:
"""Extract all pages from PDF using multi-page processor"""
try:
pages, metadata = self.multi_page_processor.extract_all_pages(pdf_path)
# Convert to expected format
page_images = [page.image_b64 for page in pages]
# Enhanced metadata with multi-page info
enhanced_metadata = {
**metadata,
'pages': [
{
'page_number': page.page_number,
'text_content': page.text_content,
'dimensions': page.dimensions,
'has_content': len(page.text_content.strip()) > 0
}
for page in pages
],
'total_text_length': sum(len(page.text_content) for page in pages),
'content_pages': sum(1 for page in pages if len(page.text_content.strip()) > 0)
}
return page_images, enhanced_metadata
except Exception as e:
raise Exception(f"Error processing multi-page PDF {pdf_path}: {str(e)}")
@staticmethod
def _encode_image(image_path: Path) -> Tuple[str, dict]:
"""Encode image file to base64"""
try:
# Open and process image
with PILImage.open(image_path) as img:
# Convert to RGB if necessary (for PNG with transparency)
if img.mode in ('RGBA', 'LA', 'P'):
background = PILImage.new('RGB', img.size, (255, 255, 255))
if img.mode == 'P':
img = img.convert('RGBA')
background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
img = background
# Resize if too large (optimize for API limits)
max_size = 2048
if img.width > max_size or img.height > max_size:
img.thumbnail((max_size, max_size), PILImage.Resampling.LANCZOS)
# Convert to base64
buffer = io.BytesIO()
img.save(buffer, format='JPEG', quality=85)
img_b64 = base64.b64encode(buffer.getvalue()).decode()
metadata = {
'source_type': 'image',
'source_file': str(image_path),
'original_format': image_path.suffix.lower(),
'dimensions': img.size
}
return img_b64, metadata
except Exception as e:
raise Exception(f"Error processing image {image_path}: {str(e)}")
@staticmethod
def is_supported_file(file_path: Path) -> bool:
"""Check if file type is supported"""
supported_extensions = {'.pdf', '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
return file_path.suffix.lower() in supported_extensions
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
enable_multi_page: Boolean flag that determines whether to process all pages of a PDF (True) or only the first page (False). When enabled, uses MultiPagePDFProcessor for comprehensive page extraction. Default is True.
max_pages: Integer specifying the maximum number of pages to process from a multi-page PDF. This limit prevents excessive processing time and memory usage for very large documents. Default is 50 pages.
Return Value
Instantiation returns an InputProcessor object. The main extract_image method returns different types based on input: For single-page mode or image files, returns a tuple of (base64_string, metadata_dict). For multi-page PDFs, returns a tuple of (list_of_base64_strings, enhanced_metadata_dict). Metadata includes source_type, source_file, dimensions, and for multi-page PDFs, detailed per-page information including text content and content analysis.
Class Interface
Methods
__init__(self, enable_multi_page: bool = True, max_pages: int = 50)
Purpose: Initialize the InputProcessor with configuration for PDF processing mode
Parameters:
enable_multi_page: Boolean to enable multi-page PDF processing (default: True)max_pages: Maximum number of pages to process in multi-page mode (default: 50)
Returns: None - initializes the instance
extract_image(self, file_path: str) -> Union[Tuple[str, dict], Tuple[List[str], dict]]
Purpose: Main method to extract and encode images from PDF or image files to base64 format
Parameters:
file_path: String path to the input file (PDF or image format)
Returns: For single page/image: Tuple of (base64_encoded_image, metadata_dict). For multi-page PDF: Tuple of (list_of_base64_images, enhanced_metadata_dict)
_extract_from_pdf(pdf_path: Path) -> Tuple[str, dict]
static
Purpose: Extract the first page of a PDF as a base64-encoded image with metadata
Parameters:
pdf_path: Path object pointing to the PDF file
Returns: Tuple of (base64_encoded_image, metadata_dict) containing source info, page count, and dimensions
_extract_multi_page_pdf(self, pdf_path: Path) -> Tuple[List[str], dict]
Purpose: Extract all pages from a PDF using the multi-page processor with text extraction
Parameters:
pdf_path: Path object pointing to the PDF file
Returns: Tuple of (list_of_base64_images, enhanced_metadata_dict) with per-page text content and analysis
_encode_image(image_path: Path) -> Tuple[str, dict]
static
Purpose: Encode a standard image file to base64 with automatic format conversion and resizing
Parameters:
image_path: Path object pointing to the image file
Returns: Tuple of (base64_encoded_image, metadata_dict) with source info, format, and dimensions
is_supported_file(file_path: Path) -> bool
static
Purpose: Check if a file type is supported by the processor based on file extension
Parameters:
file_path: Path object to check for supported extension
Returns: Boolean indicating whether the file type is supported (PDF or common image formats)
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
enable_multi_page |
bool | Flag indicating whether multi-page PDF processing is enabled | instance |
max_pages |
int | Maximum number of pages to process from multi-page PDFs | instance |
multi_page_processor |
Optional[MultiPagePDFProcessor] | Instance of MultiPagePDFProcessor for handling multi-page PDFs, or None if multi-page processing is disabled | instance |
Dependencies
base64asynciojsonpathlibPILfitziotypingosmulti_page_processor
Required Imports
import base64
import asyncio
import json
from pathlib import Path
from PIL import Image as PILImage
import fitz
import io
from typing import Optional, Tuple, List, Union
import os
from multi_page_processor import MultiPagePDFProcessor, PageAnalysis
Conditional/Optional Imports
These imports are only needed under specific conditions:
from multi_page_processor import MultiPagePDFProcessor, PageAnalysis
Condition: required when enable_multi_page=True for multi-page PDF processing
Required (conditional)Usage Example
# Basic usage with single-page PDF processing
from pathlib import Path
from input_processor import InputProcessor
# Initialize processor for single-page mode
processor = InputProcessor(enable_multi_page=False)
# Process a PDF (first page only)
image_b64, metadata = processor.extract_image('document.pdf')
print(f"Processed {metadata['source_type']}: {metadata['dimensions']}")
# Process an image file
image_b64, metadata = processor.extract_image('photo.jpg')
print(f"Image dimensions: {metadata['dimensions']}")
# Initialize for multi-page processing
multi_processor = InputProcessor(enable_multi_page=True, max_pages=10)
# Process multi-page PDF
page_images, metadata = multi_processor.extract_image('report.pdf')
print(f"Processed {len(page_images)} pages")
for page_info in metadata['pages']:
print(f"Page {page_info['page_number']}: {page_info['has_content']}")
# Check if file is supported before processing
file_path = Path('document.pdf')
if InputProcessor.is_supported_file(file_path):
result = processor.extract_image(str(file_path))
Best Practices
- Always check if a file is supported using is_supported_file() before calling extract_image() to avoid exceptions
- Choose enable_multi_page=False for simple use cases where only the first page is needed to improve performance
- Set an appropriate max_pages limit based on your memory constraints and processing requirements
- The class automatically handles image resizing to 2048px maximum dimension to optimize for API limits
- Images with transparency (RGBA, LA, P modes) are automatically converted to RGB with white background
- PDF pages are rendered at 2x zoom (DPI) for better quality suitable for e-ink displays
- Handle exceptions from extract_image() as file processing can fail due to corrupted files or unsupported formats
- For multi-page PDFs, the returned metadata includes text_content per page which can be used for content analysis
- The class is stateless after initialization - you can reuse the same instance for multiple files
- Base64 strings returned are ready for direct use in API calls or data URIs
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class DocumentProcessor_v3 68.5% similar
-
class DocumentProcessor 67.8% similar
-
class MultiPagePDFProcessor 67.1% similar
-
class DocumentProcessor_v2 62.1% similar
-
class DocumentProcessor_v1 61.6% similar