class DryRunUploadComparison
A diagnostic class that compares a custom PDF upload implementation against real reMarkable app behavior by analyzing captured network logs without making actual API calls.
/tf/active/vicechatdev/e-ink-llm/cloudtest/dry_run_comparison.py
18 - 426
complex
Purpose
This class performs dry-run testing and validation of PDF upload implementations for the reMarkable tablet ecosystem. It loads real app network logs from CSV files, simulates the custom upload process, and performs detailed comparisons of HTTP requests, headers, and content to identify discrepancies. The primary use case is debugging and validating custom upload implementations before deploying them, ensuring they match the official app's behavior to avoid authentication or compatibility issues.
Source Code
class DryRunUploadComparison:
"""Compare upload implementation against real app without making API calls"""
def __init__(self):
self.base_dir = Path(__file__).parent
# Load auth for token format analysis
from auth import RemarkableAuth
auth = RemarkableAuth()
self.auth_session = auth.get_authenticated_session()
# Load real app logs for comparison
self.real_app_logs = self.load_real_app_logs()
print("๐ Dry Run Upload Comparison Initialized")
print(f"๐ Real app logs loaded: {len(self.real_app_logs)} entries")
def load_real_app_logs(self) -> List[Dict]:
"""Load the real app logs from CSV and request files"""
try:
# Load the CSV log
csv_file = self.base_dir.parent / "app_out_bis" / "newstart.csv"
raw_folder = self.base_dir.parent / "app_out_bis" / "Raw_newstart.folder"
if not csv_file.exists() or not raw_folder.exists():
print("โ Real app logs not found")
return []
# Parse CSV to get request sequence
logs = []
with open(csv_file, 'r') as f:
lines = f.readlines()[1:] # Skip header
for line in lines:
parts = line.strip().split(',')
if len(parts) >= 8:
log_entry = {
'id': parts[0],
'url': parts[1],
'method': parts[6],
'status_code': parts[5],
'request_body_size': parts[19],
'response_body_size': parts[20]
}
logs.append(log_entry)
# Load actual request content for key requests
for log in logs:
if log['method'] == 'PUT' and 'sync/v3/files' in log['url']:
# Try to find corresponding request file
request_file = raw_folder / f"[{log['id']}] Request - {log['url'].replace('https://', '').replace('/', '_')}.txt"
if request_file.exists():
with open(request_file, 'r', encoding='utf-8', errors='ignore') as f:
log['request_content'] = f.read()
print(f"โ
Loaded {len(logs)} real app log entries")
return logs
except Exception as e:
print(f"โ Failed to load real app logs: {e}")
return []
def simulate_pdf_upload(self, pdf_name: str = "TestDocument") -> List[Dict]:
"""Simulate our PDF upload implementation and return proposed requests"""
print(f"\n๐งช Simulating PDF upload: '{pdf_name}'")
# Simulate document creation process
doc_uuid = str(uuid.uuid4())
# Create test PDF content
test_pdf_content = b'%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/MediaBox [0 0 612 792]\n>>\nendobj\nxref\n0 4\n0000000000 65535 f \n0000000010 00000 n \n0000000079 00000 n \n0000000173 00000 n \ntrailer\n<<\n/Size 4\n/Root 1 0 R\n>>\nstartxref\n301\n%%EOF'
# Simulate component creation
components = self.create_document_components(doc_uuid, pdf_name, test_pdf_content)
# Generate proposed requests
proposed_requests = []
# 1. Metadata upload
metadata_hash = self.calculate_hash(components['metadata'])
proposed_requests.append({
'step': 'metadata_upload',
'method': 'PUT',
'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}',
'headers': self.get_our_headers('metadata', doc_uuid, len(components['metadata'])),
'body': components['metadata'],
'body_size': len(components['metadata'])
})
# 2. Content upload
content_hash = self.calculate_hash(components['content'])
proposed_requests.append({
'step': 'content_upload',
'method': 'PUT',
'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{content_hash}',
'headers': self.get_our_headers('content', doc_uuid, len(components['content'])),
'body': components['content'],
'body_size': len(components['content'])
})
# 3. PDF upload
pdf_hash = self.calculate_hash(test_pdf_content)
proposed_requests.append({
'step': 'pdf_upload',
'method': 'PUT',
'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{pdf_hash}',
'headers': self.get_our_headers('pdf', doc_uuid, len(test_pdf_content)),
'body': test_pdf_content,
'body_size': len(test_pdf_content)
})
# 4. Pagedata upload
pagedata_hash = self.calculate_hash(components['pagedata'])
proposed_requests.append({
'step': 'pagedata_upload',
'method': 'PUT',
'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{pagedata_hash}',
'headers': self.get_our_headers('pagedata', doc_uuid, len(components['pagedata'])),
'body': components['pagedata'],
'body_size': len(components['pagedata'])
})
# 5. DocSchema upload
docschema_content = self.create_docschema(components, pdf_hash, doc_uuid)
docschema_hash = self.calculate_hash(docschema_content)
proposed_requests.append({
'step': 'docschema_upload',
'method': 'PUT',
'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{docschema_hash}',
'headers': self.get_our_headers('docschema', doc_uuid, len(docschema_content)),
'body': docschema_content,
'body_size': len(docschema_content)
})
# 6. Root update (simulated)
proposed_requests.append({
'step': 'root_update',
'method': 'PUT',
'url': 'https://eu.tectonic.remarkable.com/sync/v3/root',
'headers': self.get_our_headers('root', doc_uuid, 200), # Estimated size
'body': 'ROOT_UPDATE_CONTENT',
'body_size': 200
})
return proposed_requests
def create_document_components(self, doc_uuid: str, pdf_name: str, pdf_content: bytes) -> Dict:
"""Create all document components as our implementation would"""
# Metadata
metadata = {
"createdTime": str(int(time.time() * 1000)),
"lastModified": str(int(time.time() * 1000)),
"lastOpened": "0", # โ
FIXED: Always "0" like real app
"lastOpenedPage": 0,
"metadatamodified": False,
"modified": False,
"parent": "",
"pinned": False,
"source": "com.remarkable.macos", # โ
FIXED: Changed from windows to macos
"type": "DocumentType",
"visibleName": pdf_name,
"version": 1
}
# Content
content = {
"coverPageNumber": 0,
"customZoomCenterX": 0,
"customZoomCenterY": 936,
"customZoomOrientation": "portrait",
"customZoomPageHeight": 1872,
"customZoomPageWidth": 1404,
"customZoomScale": 1,
"documentMetadata": {},
"extraMetadata": {},
"fileType": "pdf",
"fontName": "",
"formatVersion": 1,
"lineHeight": -1,
"orientation": "portrait",
"originalPageCount": 1,
"pageCount": 1,
"pageTags": [],
"pages": [str(uuid.uuid4())],
"redirectionPageMap": [0],
"sizeInBytes": str(len(pdf_content)),
"tags": [],
"textAlignment": "justify",
"textScale": 1,
"zoomMode": "bestFit"
}
# Pagedata (our current implementation)
pagedata = "\n" # โ
FIXED: Changed from empty string to newline like real app
return {
'metadata': json.dumps(metadata).encode('utf-8'),
'content': json.dumps(content).encode('utf-8'),
'pagedata': pagedata.encode('utf-8')
}
def create_docschema(self, components: Dict, pdf_hash: str, doc_uuid: str) -> bytes:
"""Create docSchema as our implementation would"""
metadata_hash = self.calculate_hash(components['metadata'])
content_hash = self.calculate_hash(components['content'])
pagedata_hash = self.calculate_hash(components['pagedata'])
lines = [
"3", # Version
f"{metadata_hash}:80000000:{doc_uuid}.metadata:0:{len(components['metadata'])}",
f"{content_hash}:80000000:{doc_uuid}.content:0:{len(components['content'])}",
f"{pdf_hash}:80000000:{doc_uuid}.pdf:0:{len(b'PDF_CONTENT')}", # Placeholder
f"{pagedata_hash}:80000000:{doc_uuid}.pagedata:0:{len(components['pagedata'])}"
]
return '\n'.join(lines).encode('utf-8')
def get_our_headers(self, file_type: str, doc_uuid: str, content_size: int) -> Dict:
"""Generate headers as our implementation would"""
# Get authorization token from our session
auth_header = ""
if self.auth_session and hasattr(self.auth_session, 'headers'):
auth_header = self.auth_session.headers.get('Authorization', '')
# Base headers
headers = {
'host': 'eu.tectonic.remarkable.com',
'authorization': auth_header,
'content-type': 'application/octet-stream',
'rm-batch-number': '1',
'rm-sync-id': str(uuid.uuid4()),
'user-agent': 'desktop/3.20.0.922 (macos 15.4)', # โ
FIXED: Matches real app exactly
'content-length': str(content_size),
'connection': 'Keep-Alive',
'accept-encoding': 'gzip, deflate',
'accept-language': 'en-US,*'
}
# File-specific headers
if file_type == 'metadata':
headers['rm-filename'] = f'{doc_uuid}.metadata'
elif file_type == 'content':
headers['rm-filename'] = f'{doc_uuid}.content'
elif file_type == 'pdf':
headers['rm-filename'] = f'{doc_uuid}.pdf'
elif file_type == 'pagedata':
headers['rm-filename'] = f'{doc_uuid}.pagedata'
elif file_type == 'docschema':
headers['rm-filename'] = f'{doc_uuid}'
# Calculate CRC32C hash (simplified for dry run)
headers['x-goog-hash'] = f'crc32c={base64.b64encode(b"dummy_hash").decode()}'
return headers
def calculate_hash(self, content: bytes) -> str:
"""Calculate SHA256 hash"""
return hashlib.sha256(content).hexdigest()
def compare_with_real_app(self, our_requests: List[Dict]) -> Dict:
"""Compare our proposed requests with real app logs"""
print(f"\n๐ Comparing Implementation vs Real App")
print("=" * 60)
# Filter real app logs for file uploads
real_uploads = [log for log in self.real_app_logs
if log['method'] == 'PUT' and 'sync/v3/files' in log.get('url', '')]
print(f"๐ Our implementation: {len(our_requests)} requests")
print(f"๐ Real app: {len(real_uploads)} uploads")
differences = {
'header_differences': [],
'sequence_differences': [],
'content_differences': [],
'critical_issues': []
}
# Compare headers for each type
for our_req in our_requests:
print(f"\n๐ Analyzing {our_req['step']}:")
# Find corresponding real app request
real_req = None
for real in real_uploads:
if 'request_content' in real and our_req['step'] in ['metadata_upload', 'pdf_upload']:
real_req = real
break
if real_req and 'request_content' in real_req:
self.compare_headers(our_req, real_req, differences)
print(f" ๐ Our body size: {our_req['body_size']} bytes")
print(f" ๐ Our URL: {our_req['url']}")
print(f" ๐ Our headers preview:")
for key, value in our_req['headers'].items():
if key in ['user-agent', 'authorization', 'rm-filename', 'x-goog-hash']:
print(f" {key}: {value[:50]}{'...' if len(str(value)) > 50 else ''}")
# Analyze critical differences
self.analyze_critical_differences(differences)
return differences
def compare_headers(self, our_req: Dict, real_req: Dict, differences: Dict):
"""Compare headers between our implementation and real app"""
try:
# Parse real app request headers
real_content = real_req['request_content']
real_headers = {}
lines = real_content.split('\n')
for line in lines[1:]: # Skip first line (PUT ...)
if ':' in line and not line.startswith('PUT') and not line.startswith('<Data'):
key, value = line.split(':', 1)
real_headers[key.strip().lower()] = value.strip()
elif line.startswith('<Data'):
break
# Compare key headers
our_headers = {k.lower(): v for k, v in our_req['headers'].items()}
critical_headers = ['user-agent', 'authorization', 'content-type', 'rm-filename', 'x-goog-hash']
for header in critical_headers:
our_value = our_headers.get(header, 'MISSING')
real_value = real_headers.get(header, 'MISSING')
if our_value != real_value:
diff = {
'step': our_req['step'],
'header': header,
'our_value': our_value,
'real_value': real_value,
'critical': header in ['user-agent', 'authorization']
}
differences['header_differences'].append(diff)
print(f" โ ๏ธ Header difference - {header}:")
print(f" Our: {our_value[:50]}{'...' if len(str(our_value)) > 50 else ''}")
print(f" Real: {real_value[:50]}{'...' if len(str(real_value)) > 50 else ''}")
except Exception as e:
print(f" โ Header comparison failed: {e}")
def analyze_critical_differences(self, differences: Dict):
"""Analyze and highlight critical differences"""
print(f"\n๐จ CRITICAL DIFFERENCES ANALYSIS")
print("=" * 60)
# Group differences by criticality
critical_issues = []
for diff in differences['header_differences']:
if diff['critical'] or diff['header'] in ['user-agent', 'authorization', 'source']:
critical_issues.append(diff)
if critical_issues:
print(f"โ Found {len(critical_issues)} critical issues:")
for issue in critical_issues:
print(f" ๐ด {issue['step']} - {issue['header']}")
print(f" Problem: {issue['our_value'][:30]} vs {issue['real_value'][:30]}")
else:
print("โ
No critical header differences found")
# Check for user-agent mismatch
ua_issues = [d for d in differences['header_differences'] if d['header'] == 'user-agent']
if ua_issues:
print(f"\n๐ด USER-AGENT MISMATCH (CRITICAL):")
for ua in ua_issues:
print(f" Our: {ua['our_value']}")
print(f" Real: {ua['real_value']}")
print(f" Impact: Version/platform detection issues")
# Check for authorization differences
auth_issues = [d for d in differences['header_differences'] if d['header'] == 'authorization']
if auth_issues:
print(f"\n๐ด AUTHORIZATION DIFFERENCES (CRITICAL):")
print(f" This could cause authentication/device recognition issues")
differences['critical_issues'] = critical_issues
def generate_fix_recommendations(self, differences: Dict) -> List[str]:
"""Generate specific recommendations to fix differences"""
recommendations = []
# User-Agent fixes
ua_issues = [d for d in differences['header_differences'] if d['header'] == 'user-agent']
if ua_issues:
real_ua = ua_issues[0]['real_value']
recommendations.append(f"UPDATE: Change user-agent to: {real_ua}")
# Authorization fixes
auth_issues = [d for d in differences['header_differences'] if d['header'] == 'authorization']
if auth_issues:
recommendations.append("UPDATE: Fix JWT token to match macOS device description")
# Content-Type fixes
ct_issues = [d for d in differences['header_differences'] if d['header'] == 'content-type']
if ct_issues:
recommendations.append("UPDATE: Standardize content-type to application/octet-stream")
# Source field fix (from metadata analysis)
recommendations.append("UPDATE: Change metadata source from 'com.remarkable.windows' to 'com.remarkable.macos'")
recommendations.append("UPDATE: Change pagedata from empty string to '\\n' character")
recommendations.append("UPDATE: Set lastOpened to '0' consistently")
return recommendations
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
__init__: No parameters required. The constructor automatically initializes the comparison environment by loading authentication credentials, parsing real app logs from the file system, and setting up the base directory for file operations.
Return Value
Instantiation returns a DryRunUploadComparison object with loaded real app logs and authenticated session. Key methods return: simulate_pdf_upload() returns List[Dict] of proposed HTTP requests; compare_with_real_app() returns Dict containing header_differences, sequence_differences, content_differences, and critical_issues; generate_fix_recommendations() returns List[str] of actionable fix suggestions.
Class Interface
Methods
__init__(self)
Purpose: Initialize the comparison tool by loading authentication, real app logs, and setting up the base directory
Returns: None. Sets up instance attributes: base_dir, auth_session, real_app_logs
load_real_app_logs(self) -> List[Dict]
Purpose: Load and parse real app network logs from CSV and raw request files
Returns: List of dictionaries containing log entries with id, url, method, status_code, request_body_size, response_body_size, and optionally request_content
simulate_pdf_upload(self, pdf_name: str = 'TestDocument') -> List[Dict]
Purpose: Simulate the complete PDF upload process and generate all HTTP requests that would be made
Parameters:
pdf_name: Name of the test document to simulate uploading (default: 'TestDocument')
Returns: List of dictionaries representing HTTP requests with step, method, url, headers, body, and body_size for metadata, content, PDF, pagedata, docschema, and root update
create_document_components(self, doc_uuid: str, pdf_name: str, pdf_content: bytes) -> Dict
Purpose: Create all document components (metadata, content, pagedata) as the implementation would generate them
Parameters:
doc_uuid: UUID string for the documentpdf_name: Visible name for the documentpdf_content: Raw PDF file content as bytes
Returns: Dictionary with keys 'metadata', 'content', 'pagedata' containing JSON-encoded bytes for each component
create_docschema(self, components: Dict, pdf_hash: str, doc_uuid: str) -> bytes
Purpose: Create the docSchema file that references all document components with their hashes and sizes
Parameters:
components: Dictionary containing metadata, content, and pagedata componentspdf_hash: SHA256 hash of the PDF contentdoc_uuid: UUID string for the document
Returns: Encoded bytes of the docSchema file in reMarkable's format with version and component references
get_our_headers(self, file_type: str, doc_uuid: str, content_size: int) -> Dict
Purpose: Generate HTTP headers for file uploads matching the implementation's format
Parameters:
file_type: Type of file being uploaded: 'metadata', 'content', 'pdf', 'pagedata', 'docschema', or 'root'doc_uuid: UUID string for the documentcontent_size: Size of the content in bytes for Content-Length header
Returns: Dictionary of HTTP headers including authorization, content-type, rm-filename, user-agent, and x-goog-hash
calculate_hash(self, content: bytes) -> str
Purpose: Calculate SHA256 hash of content for file identification in sync protocol
Parameters:
content: Bytes content to hash
Returns: Hexadecimal string representation of SHA256 hash
compare_with_real_app(self, our_requests: List[Dict]) -> Dict
Purpose: Compare proposed requests against real app logs and identify differences
Parameters:
our_requests: List of request dictionaries from simulate_pdf_upload()
Returns: Dictionary with keys: header_differences, sequence_differences, content_differences, critical_issues containing detailed comparison results
compare_headers(self, our_req: Dict, real_req: Dict, differences: Dict)
Purpose: Compare HTTP headers between implementation and real app for a specific request
Parameters:
our_req: Dictionary representing our implementation's requestreal_req: Dictionary representing real app's request from logsdifferences: Dictionary to append differences to (modified in place)
Returns: None. Modifies the differences dictionary in place by appending to header_differences list
analyze_critical_differences(self, differences: Dict)
Purpose: Analyze differences and highlight critical issues that could cause failures
Parameters:
differences: Dictionary containing all differences from comparison
Returns: None. Prints analysis to console and updates differences['critical_issues'] list
generate_fix_recommendations(self, differences: Dict) -> List[str]
Purpose: Generate actionable recommendations to fix identified differences
Parameters:
differences: Dictionary containing comparison results from compare_with_real_app()
Returns: List of string recommendations describing specific fixes needed (e.g., 'UPDATE: Change user-agent to: ...')
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
base_dir |
Path | Path object pointing to the directory containing the script, used for locating log files | instance |
auth_session |
requests.Session or similar | Authenticated session object from RemarkableAuth containing authorization headers and tokens | instance |
real_app_logs |
List[Dict] | List of parsed log entries from real app network captures, each containing request details like url, method, headers, and content | instance |
Dependencies
jsontimepathlibtypinguuidhashlibbase64binascii
Required Imports
import json
import time
from pathlib import Path
from typing import Dict, Any, List
import uuid
import hashlib
import base64
import binascii
Conditional/Optional Imports
These imports are only needed under specific conditions:
from auth import RemarkableAuth
Condition: Required for authentication token generation and session management. Must be available in the same directory or Python path.
Required (conditional)Usage Example
# Initialize the comparison tool
comparison = DryRunUploadComparison()
# Simulate a PDF upload with custom document name
proposed_requests = comparison.simulate_pdf_upload(pdf_name="MyTestDocument")
# Compare against real app behavior
differences = comparison.compare_with_real_app(proposed_requests)
# Generate fix recommendations
recommendations = comparison.generate_fix_recommendations(differences)
# Review recommendations
for rec in recommendations:
print(f"Fix needed: {rec}")
# Access specific difference categories
if differences['critical_issues']:
print(f"Found {len(differences['critical_issues'])} critical issues")
for issue in differences['critical_issues']:
print(f"Issue in {issue['step']}: {issue['header']}")
Best Practices
- Instantiate the class only after ensuring real app logs are available in the expected directory structure
- Call simulate_pdf_upload() before compare_with_real_app() to generate the request sequence to compare
- Always review critical_issues in the differences dictionary as these indicate authentication or compatibility problems
- Use generate_fix_recommendations() to get actionable fixes rather than manually parsing differences
- The class performs file I/O operations during initialization, so handle potential IOError exceptions
- Real app logs must contain request content for meaningful header comparisons
- The class maintains state through instance attributes (real_app_logs, auth_session), so create new instances for independent test runs
- Hash calculations use SHA256 for file content identification matching reMarkable's sync protocol
- User-agent and authorization header mismatches are flagged as critical and should be fixed first
- The class does not make actual API calls, making it safe for testing without affecting cloud state
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function main_v77 76.3% similar
-
function main_v15 72.1% similar
-
class FixedUploadTest 71.5% similar
-
class SimplePDFUploadTest 71.0% similar
-
function test_quick_upload 68.1% similar