class DocumentComparator
A class that compares reMarkable cloud documents to analyze and identify structural differences between them, particularly useful for debugging document upload issues.
/tf/active/vicechatdev/e-ink-llm/cloudtest/compare_documents.py
13 - 235
complex
Purpose
DocumentComparator provides comprehensive analysis and comparison of reMarkable cloud documents by examining their root.docSchema entries, document schemas, components (metadata, content, pagedata), and structural properties. It's designed to help developers understand differences between documents created through the official app versus custom uploads, enabling debugging of document structure issues.
Source Code
class DocumentComparator:
"""Compare documents to find structural differences"""
def __init__(self):
# Load auth session
auth = RemarkableAuth()
self.session = auth.get_authenticated_session()
if not self.session:
raise RuntimeError("Failed to authenticate with reMarkable")
print("š Document Comparator Initialized")
def get_root_info(self):
"""Get current root.docSchema info"""
root_response = self.session.get("https://eu.tectonic.remarkable.com/sync/v4/root")
root_response.raise_for_status()
root_data = root_response.json()
# Get root content
root_content_response = self.session.get(f"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}")
root_content_response.raise_for_status()
root_content = root_content_response.text
return root_data, root_content
def analyze_document(self, doc_uuid: str, doc_name: str):
"""Analyze a single document structure"""
print(f"\nš Analyzing {doc_name} ({doc_uuid[:8]}...)")
print("=" * 50)
# Get root info
root_data, root_content = self.get_root_info()
# Find document in root
lines = root_content.strip().split('\n')
doc_entry = None
for line in lines[1:]: # Skip version header
if doc_uuid in line:
parts = line.split(':')
if len(parts) >= 5:
doc_entry = {
'hash': parts[0],
'flags': parts[1],
'uuid': parts[2],
'type': parts[3],
'size': parts[4],
'full_line': line
}
break
if not doc_entry:
print(f"ā Document not found in root.docSchema")
return None
print(f"š ROOT.DOCSCHEMA ENTRY:")
print(f" Hash: {doc_entry['hash']}")
print(f" Flags: {doc_entry['flags']}")
print(f" UUID: {doc_entry['uuid']}")
print(f" Type: {doc_entry['type']}")
print(f" Size: {doc_entry['size']}")
print(f" Full line: {doc_entry['full_line']}")
# Get document schema
doc_response = self.session.get(f"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_entry['hash']}")
doc_response.raise_for_status()
doc_content = doc_response.text
doc_lines = doc_content.strip().split('\n')
print(f"\nš DOCUMENT SCHEMA:")
print(f" Version: {doc_lines[0]}")
print(f" Components: {len(doc_lines) - 1}")
components = {}
for i, line in enumerate(doc_lines[1:], 1):
parts = line.split(':')
if len(parts) >= 5:
component_type = parts[2].split('.')[-1] # Get extension
components[component_type] = {
'hash': parts[0],
'flags': parts[1],
'filename': parts[2],
'type': parts[3],
'size': parts[4],
'line': line
}
print(f" Component {i}: {component_type} ({parts[4]} bytes)")
# Analyze metadata
if 'metadata' in components:
metadata_hash = components['metadata']['hash']
metadata_response = self.session.get(f"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}")
metadata_response.raise_for_status()
metadata = json.loads(metadata_response.text)
print(f"\nš METADATA:")
for key, value in sorted(metadata.items()):
print(f" {key}: {value}")
# Analyze content if exists
if 'content' in components:
content_hash = components['content']['hash']
content_response = self.session.get(f"https://eu.tectonic.remarkable.com/sync/v3/files/{content_hash}")
content_response.raise_for_status()
content_data = content_response.text
print(f"\nš CONTENT:")
print(f" Size: {len(content_data)} bytes")
print(f" Type: {type(content_data)}")
if len(content_data) < 200:
print(f" Data: {content_data}")
else:
print(f" Preview: {content_data[:100]}...")
# Analyze pagedata if exists
if 'pagedata' in components:
pagedata_hash = components['pagedata']['hash']
pagedata_response = self.session.get(f"https://eu.tectonic.remarkable.com/sync/v3/files/{pagedata_hash}")
pagedata_response.raise_for_status()
pagedata_data = pagedata_response.text
print(f"\nš PAGEDATA:")
print(f" Size: {len(pagedata_data)} bytes")
if len(pagedata_data) < 200:
print(f" Data: {pagedata_data}")
else:
print(f" Preview: {pagedata_data[:100]}...")
return {
'entry': doc_entry,
'schema_lines': doc_lines,
'components': components,
'metadata': metadata if 'metadata' in components else None
}
def compare_documents(self):
"""Compare the real invoice vs our uploaded document"""
print(f"š Comparing Real App Document vs Our Upload")
print("=" * 60)
# Analyze invoice document (real app) - Poulpharm invoice
invoice_uuid = "cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc" # Poulpharm invoice
invoice_data = self.analyze_document(invoice_uuid, "Poulpharm Invoice (Real App)")
# Analyze our uploaded document
upload_uuid = "206f5df3-07c2-4341-8afd-2b7362aefa91"
upload_data = self.analyze_document(upload_uuid, "Our Upload")
if not invoice_data or not upload_data:
print(f"ā Could not analyze both documents")
return
# Compare key differences
print(f"\nš KEY DIFFERENCES:")
print("=" * 30)
# Compare root entry flags
invoice_flags = invoice_data['entry']['flags']
upload_flags = upload_data['entry']['flags']
if invoice_flags != upload_flags:
print(f"š ROOT FLAGS DIFFER:")
print(f" Invoice: {invoice_flags}")
print(f" Upload: {upload_flags}")
# Compare document types
invoice_type = invoice_data['entry']['type']
upload_type = upload_data['entry']['type']
if invoice_type != upload_type:
print(f"š DOCUMENT TYPE DIFFERS:")
print(f" Invoice: {invoice_type}")
print(f" Upload: {upload_type}")
# Compare components
invoice_components = set(invoice_data['components'].keys())
upload_components = set(upload_data['components'].keys())
missing_in_upload = invoice_components - upload_components
extra_in_upload = upload_components - invoice_components
if missing_in_upload:
print(f"š COMPONENTS MISSING IN UPLOAD: {missing_in_upload}")
if extra_in_upload:
print(f"š EXTRA COMPONENTS IN UPLOAD: {extra_in_upload}")
# Compare metadata keys
if invoice_data['metadata'] and upload_data['metadata']:
invoice_keys = set(invoice_data['metadata'].keys())
upload_keys = set(upload_data['metadata'].keys())
missing_keys = invoice_keys - upload_keys
extra_keys = upload_keys - invoice_keys
if missing_keys:
print(f"š METADATA KEYS MISSING IN UPLOAD: {missing_keys}")
if extra_keys:
print(f"š EXTRA METADATA KEYS IN UPLOAD: {extra_keys}")
# Compare specific metadata values
common_keys = invoice_keys & upload_keys
for key in common_keys:
if invoice_data['metadata'][key] != upload_data['metadata'][key]:
print(f"š METADATA DIFFERS for '{key}':")
print(f" Invoice: {invoice_data['metadata'][key]}")
print(f" Upload: {upload_data['metadata'][key]}")
# Compare component flags and sizes
for component in invoice_components & upload_components:
invoice_comp = invoice_data['components'][component]
upload_comp = upload_data['components'][component]
if invoice_comp['flags'] != upload_comp['flags']:
print(f"š COMPONENT FLAGS DIFFER for {component}:")
print(f" Invoice: {invoice_comp['flags']}")
print(f" Upload: {upload_comp['flags']}")
if invoice_comp['type'] != upload_comp['type']:
print(f"š COMPONENT TYPE DIFFERS for {component}:")
print(f" Invoice: {invoice_comp['type']}")
print(f" Upload: {upload_comp['type']}")
print(f"\nš” ANALYSIS COMPLETE")
return invoice_data, upload_data
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
__init__: No parameters required. The constructor automatically initializes authentication with reMarkable cloud services using RemarkableAuth and establishes an authenticated session. Raises RuntimeError if authentication fails.
Return Value
Instantiation returns a DocumentComparator object with an authenticated session. The analyze_document method returns a dictionary containing document entry data, schema lines, components dictionary, and metadata (or None if analysis fails). The compare_documents method returns a tuple of (invoice_data, upload_data) dictionaries with full analysis results for both documents.
Class Interface
Methods
__init__(self)
Purpose: Initialize the DocumentComparator with an authenticated reMarkable session
Returns: None - initializes the instance with self.session attribute
get_root_info(self) -> tuple
Purpose: Retrieve the current root.docSchema information from reMarkable cloud
Returns: Tuple of (root_data: dict, root_content: str) where root_data contains the root document metadata and root_content is the raw schema content
analyze_document(self, doc_uuid: str, doc_name: str) -> dict | None
Purpose: Perform comprehensive analysis of a single document's structure, components, and metadata
Parameters:
doc_uuid: The UUID of the document to analyze (full UUID string)doc_name: Human-readable name for the document (used in console output)
Returns: Dictionary with keys 'entry' (root entry data), 'schema_lines' (document schema lines), 'components' (dict of component types to their data), and 'metadata' (parsed metadata or None). Returns None if document not found.
compare_documents(self) -> tuple | None
Purpose: Compare two hardcoded documents (Poulpharm invoice vs uploaded document) to identify structural differences
Returns: Tuple of (invoice_data: dict, upload_data: dict) containing full analysis results for both documents, or None if either document cannot be analyzed
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
session |
requests.Session | Authenticated HTTP session object for making API requests to reMarkable cloud services. Initialized in __init__ via RemarkableAuth.get_authenticated_session() | instance |
Dependencies
jsonrequestsauth
Required Imports
import json
from auth import RemarkableAuth
Usage Example
# Initialize the comparator
comparator = DocumentComparator()
# Analyze a single document
doc_uuid = 'cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc'
result = comparator.analyze_document(doc_uuid, 'My Document')
if result:
print(f"Components: {result['components'].keys()}")
print(f"Metadata: {result['metadata']}")
# Compare two documents (uses hardcoded UUIDs)
invoice_data, upload_data = comparator.compare_documents()
# Get current root info
root_data, root_content = comparator.get_root_info()
Best Practices
- Always instantiate within a try-except block to handle authentication failures gracefully
- The class maintains an authenticated session as instance state - reuse the same instance for multiple operations to avoid re-authentication
- Call get_root_info() to ensure you're working with the latest document state before analysis
- The compare_documents() method uses hardcoded UUIDs - modify these in the source or extend the method to accept parameters
- Network errors may occur during API calls - implement retry logic or error handling around method calls
- Large documents may take time to download and analyze - consider adding timeouts for production use
- The session object is stateful and may expire - implement session refresh logic for long-running processes
- Document UUIDs must exist in the reMarkable cloud; non-existent UUIDs will return None from analyze_document
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function main_v113 71.6% similar
-
class RealAppUploadAnalyzer 70.8% similar
-
function main_v94 67.5% similar
-
class FolderDebugger 65.6% similar
-
function verify_document_status 64.3% similar