DryRunUploadComparison - Code Extractor

class DryRunUploadComparison

Maturity: 46

A diagnostic class that compares a custom PDF upload implementation against real reMarkable app behavior by analyzing captured network logs without making actual API calls.

File:
/tf/active/vicechatdev/e-ink-llm/cloudtest/dry_run_comparison.py

Lines:
18 - 426

Complexity:
complex

Purpose

This class performs dry-run testing and validation of PDF upload implementations for the reMarkable tablet ecosystem. It loads real app network logs from CSV files, simulates the custom upload process, and performs detailed comparisons of HTTP requests, headers, and content to identify discrepancies. The primary use case is debugging and validating custom upload implementations before deploying them, ensuring they match the official app's behavior to avoid authentication or compatibility issues.

Source Code

class DryRunUploadComparison:
    """Compare upload implementation against real app without making API calls"""
    
    def __init__(self):
        self.base_dir = Path(__file__).parent
        
        # Load auth for token format analysis
        from auth import RemarkableAuth
        auth = RemarkableAuth()
        self.auth_session = auth.get_authenticated_session()
        
        # Load real app logs for comparison
        self.real_app_logs = self.load_real_app_logs()
        
        print("🔍 Dry Run Upload Comparison Initialized")
        print(f"📋 Real app logs loaded: {len(self.real_app_logs)} entries")
    
    def load_real_app_logs(self) -> List[Dict]:
        """Load the real app logs from CSV and request files"""
        try:
            # Load the CSV log
            csv_file = self.base_dir.parent / "app_out_bis" / "newstart.csv"
            raw_folder = self.base_dir.parent / "app_out_bis" / "Raw_newstart.folder"
            
            if not csv_file.exists() or not raw_folder.exists():
                print("❌ Real app logs not found")
                return []
            
            # Parse CSV to get request sequence
            logs = []
            with open(csv_file, 'r') as f:
                lines = f.readlines()[1:]  # Skip header
                for line in lines:
                    parts = line.strip().split(',')
                    if len(parts) >= 8:
                        log_entry = {
                            'id': parts[0],
                            'url': parts[1],
                            'method': parts[6],
                            'status_code': parts[5],
                            'request_body_size': parts[19],
                            'response_body_size': parts[20]
                        }
                        logs.append(log_entry)
            
            # Load actual request content for key requests
            for log in logs:
                if log['method'] == 'PUT' and 'sync/v3/files' in log['url']:
                    # Try to find corresponding request file
                    request_file = raw_folder / f"[{log['id']}] Request - {log['url'].replace('https://', '').replace('/', '_')}.txt"
                    if request_file.exists():
                        with open(request_file, 'r', encoding='utf-8', errors='ignore') as f:
                            log['request_content'] = f.read()
            
            print(f"✅ Loaded {len(logs)} real app log entries")
            return logs
            
        except Exception as e:
            print(f"❌ Failed to load real app logs: {e}")
            return []
    
    def simulate_pdf_upload(self, pdf_name: str = "TestDocument") -> List[Dict]:
        """Simulate our PDF upload implementation and return proposed requests"""
        print(f"\n🧪 Simulating PDF upload: '{pdf_name}'")
        
        # Simulate document creation process
        doc_uuid = str(uuid.uuid4())
        
        # Create test PDF content
        test_pdf_content = b'%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/MediaBox [0 0 612 792]\n>>\nendobj\nxref\n0 4\n0000000000 65535 f \n0000000010 00000 n \n0000000079 00000 n \n0000000173 00000 n \ntrailer\n<<\n/Size 4\n/Root 1 0 R\n>>\nstartxref\n301\n%%EOF'
        
        # Simulate component creation
        components = self.create_document_components(doc_uuid, pdf_name, test_pdf_content)
        
        # Generate proposed requests
        proposed_requests = []
        
        # 1. Metadata upload
        metadata_hash = self.calculate_hash(components['metadata'])
        proposed_requests.append({
            'step': 'metadata_upload',
            'method': 'PUT',
            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}',
            'headers': self.get_our_headers('metadata', doc_uuid, len(components['metadata'])),
            'body': components['metadata'],
            'body_size': len(components['metadata'])
        })
        
        # 2. Content upload
        content_hash = self.calculate_hash(components['content'])
        proposed_requests.append({
            'step': 'content_upload',
            'method': 'PUT',
            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{content_hash}',
            'headers': self.get_our_headers('content', doc_uuid, len(components['content'])),
            'body': components['content'],
            'body_size': len(components['content'])
        })
        
        # 3. PDF upload
        pdf_hash = self.calculate_hash(test_pdf_content)
        proposed_requests.append({
            'step': 'pdf_upload',
            'method': 'PUT',
            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{pdf_hash}',
            'headers': self.get_our_headers('pdf', doc_uuid, len(test_pdf_content)),
            'body': test_pdf_content,
            'body_size': len(test_pdf_content)
        })
        
        # 4. Pagedata upload
        pagedata_hash = self.calculate_hash(components['pagedata'])
        proposed_requests.append({
            'step': 'pagedata_upload',
            'method': 'PUT',
            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{pagedata_hash}',
            'headers': self.get_our_headers('pagedata', doc_uuid, len(components['pagedata'])),
            'body': components['pagedata'],
            'body_size': len(components['pagedata'])
        })
        
        # 5. DocSchema upload
        docschema_content = self.create_docschema(components, pdf_hash, doc_uuid)
        docschema_hash = self.calculate_hash(docschema_content)
        proposed_requests.append({
            'step': 'docschema_upload',
            'method': 'PUT',
            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{docschema_hash}',
            'headers': self.get_our_headers('docschema', doc_uuid, len(docschema_content)),
            'body': docschema_content,
            'body_size': len(docschema_content)
        })
        
        # 6. Root update (simulated)
        proposed_requests.append({
            'step': 'root_update',
            'method': 'PUT',
            'url': 'https://eu.tectonic.remarkable.com/sync/v3/root',
            'headers': self.get_our_headers('root', doc_uuid, 200),  # Estimated size
            'body': 'ROOT_UPDATE_CONTENT',
            'body_size': 200
        })
        
        return proposed_requests
    
    def create_document_components(self, doc_uuid: str, pdf_name: str, pdf_content: bytes) -> Dict:
        """Create all document components as our implementation would"""
        
        # Metadata
        metadata = {
            "createdTime": str(int(time.time() * 1000)),
            "lastModified": str(int(time.time() * 1000)),
            "lastOpened": "0",  # ✅ FIXED: Always "0" like real app
            "lastOpenedPage": 0,
            "metadatamodified": False,
            "modified": False,
            "parent": "",
            "pinned": False,
            "source": "com.remarkable.macos",  # ✅ FIXED: Changed from windows to macos
            "type": "DocumentType",
            "visibleName": pdf_name,
            "version": 1
        }
        
        # Content
        content = {
            "coverPageNumber": 0,
            "customZoomCenterX": 0,
            "customZoomCenterY": 936,
            "customZoomOrientation": "portrait",
            "customZoomPageHeight": 1872,
            "customZoomPageWidth": 1404,
            "customZoomScale": 1,
            "documentMetadata": {},
            "extraMetadata": {},
            "fileType": "pdf",
            "fontName": "",
            "formatVersion": 1,
            "lineHeight": -1,
            "orientation": "portrait",
            "originalPageCount": 1,
            "pageCount": 1,
            "pageTags": [],
            "pages": [str(uuid.uuid4())],
            "redirectionPageMap": [0],
            "sizeInBytes": str(len(pdf_content)),
            "tags": [],
            "textAlignment": "justify",
            "textScale": 1,
            "zoomMode": "bestFit"
        }
        
        # Pagedata (our current implementation)
        pagedata = "\n"  # ✅ FIXED: Changed from empty string to newline like real app
        
        return {
            'metadata': json.dumps(metadata).encode('utf-8'),
            'content': json.dumps(content).encode('utf-8'),
            'pagedata': pagedata.encode('utf-8')
        }
    
    def create_docschema(self, components: Dict, pdf_hash: str, doc_uuid: str) -> bytes:
        """Create docSchema as our implementation would"""
        metadata_hash = self.calculate_hash(components['metadata'])
        content_hash = self.calculate_hash(components['content'])
        pagedata_hash = self.calculate_hash(components['pagedata'])
        
        lines = [
            "3",  # Version
            f"{metadata_hash}:80000000:{doc_uuid}.metadata:0:{len(components['metadata'])}",
            f"{content_hash}:80000000:{doc_uuid}.content:0:{len(components['content'])}",
            f"{pdf_hash}:80000000:{doc_uuid}.pdf:0:{len(b'PDF_CONTENT')}",  # Placeholder
            f"{pagedata_hash}:80000000:{doc_uuid}.pagedata:0:{len(components['pagedata'])}"
        ]
        
        return '\n'.join(lines).encode('utf-8')
    
    def get_our_headers(self, file_type: str, doc_uuid: str, content_size: int) -> Dict:
        """Generate headers as our implementation would"""
        
        # Get authorization token from our session
        auth_header = ""
        if self.auth_session and hasattr(self.auth_session, 'headers'):
            auth_header = self.auth_session.headers.get('Authorization', '')
        
        # Base headers
        headers = {
            'host': 'eu.tectonic.remarkable.com',
            'authorization': auth_header,
            'content-type': 'application/octet-stream',
            'rm-batch-number': '1',
            'rm-sync-id': str(uuid.uuid4()),
            'user-agent': 'desktop/3.20.0.922 (macos 15.4)',  # ✅ FIXED: Matches real app exactly
            'content-length': str(content_size),
            'connection': 'Keep-Alive',
            'accept-encoding': 'gzip, deflate',
            'accept-language': 'en-US,*'
        }
        
        # File-specific headers
        if file_type == 'metadata':
            headers['rm-filename'] = f'{doc_uuid}.metadata'
        elif file_type == 'content':
            headers['rm-filename'] = f'{doc_uuid}.content'
        elif file_type == 'pdf':
            headers['rm-filename'] = f'{doc_uuid}.pdf'
        elif file_type == 'pagedata':
            headers['rm-filename'] = f'{doc_uuid}.pagedata'
        elif file_type == 'docschema':
            headers['rm-filename'] = f'{doc_uuid}'
        
        # Calculate CRC32C hash (simplified for dry run)
        headers['x-goog-hash'] = f'crc32c={base64.b64encode(b"dummy_hash").decode()}'
        
        return headers
    
    def calculate_hash(self, content: bytes) -> str:
        """Calculate SHA256 hash"""
        return hashlib.sha256(content).hexdigest()
    
    def compare_with_real_app(self, our_requests: List[Dict]) -> Dict:
        """Compare our proposed requests with real app logs"""
        print(f"\n🔍 Comparing Implementation vs Real App")
        print("=" * 60)
        
        # Filter real app logs for file uploads
        real_uploads = [log for log in self.real_app_logs 
                       if log['method'] == 'PUT' and 'sync/v3/files' in log.get('url', '')]
        
        print(f"📊 Our implementation: {len(our_requests)} requests")
        print(f"📊 Real app: {len(real_uploads)} uploads")
        
        differences = {
            'header_differences': [],
            'sequence_differences': [],
            'content_differences': [],
            'critical_issues': []
        }
        
        # Compare headers for each type
        for our_req in our_requests:
            print(f"\n🔍 Analyzing {our_req['step']}:")
            
            # Find corresponding real app request
            real_req = None
            for real in real_uploads:
                if 'request_content' in real and our_req['step'] in ['metadata_upload', 'pdf_upload']:
                    real_req = real
                    break
            
            if real_req and 'request_content' in real_req:
                self.compare_headers(our_req, real_req, differences)
            
            print(f"   📏 Our body size: {our_req['body_size']} bytes")
            print(f"   🔗 Our URL: {our_req['url']}")
            print(f"   📋 Our headers preview:")
            for key, value in our_req['headers'].items():
                if key in ['user-agent', 'authorization', 'rm-filename', 'x-goog-hash']:
                    print(f"      {key}: {value[:50]}{'...' if len(str(value)) > 50 else ''}")
        
        # Analyze critical differences
        self.analyze_critical_differences(differences)
        
        return differences
    
    def compare_headers(self, our_req: Dict, real_req: Dict, differences: Dict):
        """Compare headers between our implementation and real app"""
        try:
            # Parse real app request headers
            real_content = real_req['request_content']
            real_headers = {}
            
            lines = real_content.split('\n')
            for line in lines[1:]:  # Skip first line (PUT ...)
                if ':' in line and not line.startswith('PUT') and not line.startswith('<Data'):
                    key, value = line.split(':', 1)
                    real_headers[key.strip().lower()] = value.strip()
                elif line.startswith('<Data'):
                    break
            
            # Compare key headers
            our_headers = {k.lower(): v for k, v in our_req['headers'].items()}
            
            critical_headers = ['user-agent', 'authorization', 'content-type', 'rm-filename', 'x-goog-hash']
            
            for header in critical_headers:
                our_value = our_headers.get(header, 'MISSING')
                real_value = real_headers.get(header, 'MISSING')
                
                if our_value != real_value:
                    diff = {
                        'step': our_req['step'],
                        'header': header,
                        'our_value': our_value,
                        'real_value': real_value,
                        'critical': header in ['user-agent', 'authorization']
                    }
                    differences['header_differences'].append(diff)
                    
                    print(f"   ⚠️ Header difference - {header}:")
                    print(f"      Our: {our_value[:50]}{'...' if len(str(our_value)) > 50 else ''}")
                    print(f"      Real: {real_value[:50]}{'...' if len(str(real_value)) > 50 else ''}")
        
        except Exception as e:
            print(f"   ❌ Header comparison failed: {e}")
    
    def analyze_critical_differences(self, differences: Dict):
        """Analyze and highlight critical differences"""
        print(f"\n🚨 CRITICAL DIFFERENCES ANALYSIS")
        print("=" * 60)
        
        # Group differences by criticality
        critical_issues = []
        
        for diff in differences['header_differences']:
            if diff['critical'] or diff['header'] in ['user-agent', 'authorization', 'source']:
                critical_issues.append(diff)
        
        if critical_issues:
            print(f"❌ Found {len(critical_issues)} critical issues:")
            for issue in critical_issues:
                print(f"   🔴 {issue['step']} - {issue['header']}")
                print(f"      Problem: {issue['our_value'][:30]} vs {issue['real_value'][:30]}")
        else:
            print("✅ No critical header differences found")
        
        # Check for user-agent mismatch
        ua_issues = [d for d in differences['header_differences'] if d['header'] == 'user-agent']
        if ua_issues:
            print(f"\n🔴 USER-AGENT MISMATCH (CRITICAL):")
            for ua in ua_issues:
                print(f"   Our: {ua['our_value']}")
                print(f"   Real: {ua['real_value']}")
                print(f"   Impact: Version/platform detection issues")
        
        # Check for authorization differences
        auth_issues = [d for d in differences['header_differences'] if d['header'] == 'authorization']
        if auth_issues:
            print(f"\n🔴 AUTHORIZATION DIFFERENCES (CRITICAL):")
            print(f"   This could cause authentication/device recognition issues")
        
        differences['critical_issues'] = critical_issues
    
    def generate_fix_recommendations(self, differences: Dict) -> List[str]:
        """Generate specific recommendations to fix differences"""
        recommendations = []
        
        # User-Agent fixes
        ua_issues = [d for d in differences['header_differences'] if d['header'] == 'user-agent']
        if ua_issues:
            real_ua = ua_issues[0]['real_value']
            recommendations.append(f"UPDATE: Change user-agent to: {real_ua}")
        
        # Authorization fixes
        auth_issues = [d for d in differences['header_differences'] if d['header'] == 'authorization']
        if auth_issues:
            recommendations.append("UPDATE: Fix JWT token to match macOS device description")
        
        # Content-Type fixes
        ct_issues = [d for d in differences['header_differences'] if d['header'] == 'content-type']
        if ct_issues:
            recommendations.append("UPDATE: Standardize content-type to application/octet-stream")
        
        # Source field fix (from metadata analysis)
        recommendations.append("UPDATE: Change metadata source from 'com.remarkable.windows' to 'com.remarkable.macos'")
        recommendations.append("UPDATE: Change pagedata from empty string to '\\n' character")
        recommendations.append("UPDATE: Set lastOpened to '0' consistently")
        
        return recommendations

Parameters

Name	Type	Default	Kind
`bases`	-	-

Parameter Details

__init__: No parameters required. The constructor automatically initializes the comparison environment by loading authentication credentials, parsing real app logs from the file system, and setting up the base directory for file operations.

Return Value

Instantiation returns a DryRunUploadComparison object with loaded real app logs and authenticated session. Key methods return: simulate_pdf_upload() returns List[Dict] of proposed HTTP requests; compare_with_real_app() returns Dict containing header_differences, sequence_differences, content_differences, and critical_issues; generate_fix_recommendations() returns List[str] of actionable fix suggestions.

Class Interface

Methods

`init(self)`

Purpose: Initialize the comparison tool by loading authentication, real app logs, and setting up the base directory

Returns: None. Sets up instance attributes: base_dir, auth_session, real_app_logs

`load_real_app_logs(self) -> List[Dict]`

Purpose: Load and parse real app network logs from CSV and raw request files

Returns: List of dictionaries containing log entries with id, url, method, status_code, request_body_size, response_body_size, and optionally request_content

`simulate_pdf_upload(self, pdf_name: str = 'TestDocument') -> List[Dict]`

Purpose: Simulate the complete PDF upload process and generate all HTTP requests that would be made

Parameters:

pdf_name: Name of the test document to simulate uploading (default: 'TestDocument')

Returns: List of dictionaries representing HTTP requests with step, method, url, headers, body, and body_size for metadata, content, PDF, pagedata, docschema, and root update

`create_document_components(self, doc_uuid: str, pdf_name: str, pdf_content: bytes) -> Dict`

Purpose: Create all document components (metadata, content, pagedata) as the implementation would generate them

Parameters:

doc_uuid: UUID string for the document
pdf_name: Visible name for the document
pdf_content: Raw PDF file content as bytes

Returns: Dictionary with keys 'metadata', 'content', 'pagedata' containing JSON-encoded bytes for each component

`create_docschema(self, components: Dict, pdf_hash: str, doc_uuid: str) -> bytes`

Purpose: Create the docSchema file that references all document components with their hashes and sizes

Parameters:

components: Dictionary containing metadata, content, and pagedata components
pdf_hash: SHA256 hash of the PDF content
doc_uuid: UUID string for the document

Returns: Encoded bytes of the docSchema file in reMarkable's format with version and component references

`get_our_headers(self, file_type: str, doc_uuid: str, content_size: int) -> Dict`

Purpose: Generate HTTP headers for file uploads matching the implementation's format

Parameters:

file_type: Type of file being uploaded: 'metadata', 'content', 'pdf', 'pagedata', 'docschema', or 'root'
doc_uuid: UUID string for the document
content_size: Size of the content in bytes for Content-Length header

Returns: Dictionary of HTTP headers including authorization, content-type, rm-filename, user-agent, and x-goog-hash

`calculate_hash(self, content: bytes) -> str`

Purpose: Calculate SHA256 hash of content for file identification in sync protocol

Parameters:

content: Bytes content to hash

Returns: Hexadecimal string representation of SHA256 hash

`compare_with_real_app(self, our_requests: List[Dict]) -> Dict`

Purpose: Compare proposed requests against real app logs and identify differences

Parameters:

our_requests: List of request dictionaries from simulate_pdf_upload()

Returns: Dictionary with keys: header_differences, sequence_differences, content_differences, critical_issues containing detailed comparison results

`compare_headers(self, our_req: Dict, real_req: Dict, differences: Dict)`

Purpose: Compare HTTP headers between implementation and real app for a specific request

Parameters:

our_req: Dictionary representing our implementation's request
real_req: Dictionary representing real app's request from logs
differences: Dictionary to append differences to (modified in place)

Returns: None. Modifies the differences dictionary in place by appending to header_differences list

`analyze_critical_differences(self, differences: Dict)`

Purpose: Analyze differences and highlight critical issues that could cause failures

Parameters:

differences: Dictionary containing all differences from comparison

Returns: None. Prints analysis to console and updates differences['critical_issues'] list

`generate_fix_recommendations(self, differences: Dict) -> List[str]`

Purpose: Generate actionable recommendations to fix identified differences

Parameters:

differences: Dictionary containing comparison results from compare_with_real_app()

Returns: List of string recommendations describing specific fixes needed (e.g., 'UPDATE: Change user-agent to: ...')

Attributes

Name	Type	Description	Scope
`base_dir`	Path	Path object pointing to the directory containing the script, used for locating log files	instance
`auth_session`	requests.Session or similar	Authenticated session object from RemarkableAuth containing authorization headers and tokens	instance
`real_app_logs`	List[Dict]	List of parsed log entries from real app network captures, each containing request details like url, method, headers, and content	instance

Dependencies

json
time
pathlib
typing
uuid
hashlib
base64
binascii

Required Imports

import json
import time
from pathlib import Path
from typing import Dict, Any, List
import uuid
import hashlib
import base64
import binascii

Conditional/Optional Imports

These imports are only needed under specific conditions:

from auth import RemarkableAuth

Condition: Required for authentication token generation and session management. Must be available in the same directory or Python path.

Required (conditional)

Usage Example

# Initialize the comparison tool
comparison = DryRunUploadComparison()

# Simulate a PDF upload with custom document name
proposed_requests = comparison.simulate_pdf_upload(pdf_name="MyTestDocument")

# Compare against real app behavior
differences = comparison.compare_with_real_app(proposed_requests)

# Generate fix recommendations
recommendations = comparison.generate_fix_recommendations(differences)

# Review recommendations
for rec in recommendations:
    print(f"Fix needed: {rec}")

# Access specific difference categories
if differences['critical_issues']:
    print(f"Found {len(differences['critical_issues'])} critical issues")
    for issue in differences['critical_issues']:
        print(f"Issue in {issue['step']}: {issue['header']}")

Best Practices

Instantiate the class only after ensuring real app logs are available in the expected directory structure
Call simulate_pdf_upload() before compare_with_real_app() to generate the request sequence to compare
Always review critical_issues in the differences dictionary as these indicate authentication or compatibility problems
Use generate_fix_recommendations() to get actionable fixes rather than manually parsing differences
The class performs file I/O operations during initialization, so handle potential IOError exceptions
Real app logs must contain request content for meaningful header comparisons
The class maintains state through instance attributes (real_app_logs, auth_session), so create new instances for independent test runs
Hash calculations use SHA256 for file content identification matching reMarkable's sync protocol
User-agent and authorization header mismatches are flagged as critical and should be fixed first
The class does not make actual API calls, making it safe for testing without affecting cloud state

Similar Components

AI-powered semantic similarity - components with related functionality:

function main_v77 76.3% similar

Executes a dry run comparison analysis of PDF upload requests between a simulated implementation and a real application, without making actual API calls.
From: /tf/active/vicechatdev/e-ink-llm/cloudtest/dry_run_comparison.py
function main_v15 72.1% similar

A test function that uploads a PDF document to reMarkable cloud, syncs the local replica, and validates the upload with detailed logging and metrics.
From: /tf/active/vicechatdev/e-ink-llm/cloudtest/test_raw_upload.py
class FixedUploadTest 71.5% similar

A test class that simulates document upload to reMarkable cloud with specific fixes applied to match the real reMarkable desktop app behavior.
From: /tf/active/vicechatdev/e-ink-llm/cloudtest/fixed_upload_test.py
class SimplePDFUploadTest 71.0% similar

A test class for validating PDF upload functionality to reMarkable cloud, including raw content upload and complete document creation with metadata.
From: /tf/active/vicechatdev/e-ink-llm/cloudtest/test_simple_pdf_upload.py
function test_quick_upload 68.1% similar

A test function that performs a quick PDF upload to a reMarkable device without performing a full synchronization, used for testing the upload functionality in isolation.
From: /tf/active/vicechatdev/e-ink-llm/cloudtest/quick_test.py

← Back to Browse

Assistant

Hi! I can help improve this code. Tell me what you'd like to enhance (e.g., "add error handling", "optimize performance", "improve readability", "add type hints").

Code Comparison

Original Code

                            class DryRunUploadComparison:
    """Compare upload implementation against real app without making API calls"""
    
    def __init__(self):
        self.base_dir = Path(__file__).parent
        
        # Load auth for token format analysis
        from auth import RemarkableAuth
        auth = RemarkableAuth()
        self.auth_session = auth.get_authenticated_session()
        
        # Load real app logs for comparison
        self.real_app_logs = self.load_real_app_logs()
        
        print("🔍 Dry Run Upload Comparison Initialized")
        print(f"📋 Real app logs loaded: {len(self.real_app_logs)} entries")
    
    def load_real_app_logs(self) -> List[Dict]:
        """Load the real app logs from CSV and request files"""
        try:
            # Load the CSV log
            csv_file = self.base_dir.parent / "app_out_bis" / "newstart.csv"
            raw_folder = self.base_dir.parent / "app_out_bis" / "Raw_newstart.folder"
            
            if not csv_file.exists() or not raw_folder.exists():
                print("❌ Real app logs not found")
                return []
            
            # Parse CSV to get request sequence
            logs = []
            with open(csv_file, 'r') as f:
                lines = f.readlines()[1:]  # Skip header
                for line in lines:
                    parts = line.strip().split(',')
                    if len(parts) >= 8:
                        log_entry = {
                            'id': parts[0],
                            'url': parts[1],
                            'method': parts[6],
                            'status_code': parts[5],
                            'request_body_size': parts[19],
                            'response_body_size': parts[20]
                        }
                        logs.append(log_entry)
            
            # Load actual request content for key requests
            for log in logs:
                if log['method'] == 'PUT' and 'sync/v3/files' in log['url']:
                    # Try to find corresponding request file
                    request_file = raw_folder / f"[{log['id']}] Request - {log['url'].replace('https://', '').replace('/', '_')}.txt"
                    if request_file.exists():
                        with open(request_file, 'r', encoding='utf-8', errors='ignore') as f:
                            log['request_content'] = f.read()
            
            print(f"✅ Loaded {len(logs)} real app log entries")
            return logs
            
        except Exception as e:
            print(f"❌ Failed to load real app logs: {e}")
            return []
    
    def simulate_pdf_upload(self, pdf_name: str = "TestDocument") -> List[Dict]:
        """Simulate our PDF upload implementation and return proposed requests"""
        print(f"\n🧪 Simulating PDF upload: '{pdf_name}'")
        
        # Simulate document creation process
        doc_uuid = str(uuid.uuid4())
        
        # Create test PDF content
        test_pdf_content = b'%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/MediaBox [0 0 612 792]\n>>\nendobj\nxref\n0 4\n0000000000 65535 f \n0000000010 00000 n \n0000000079 00000 n \n0000000173 00000 n \ntrailer\n<<\n/Size 4\n/Root 1 0 R\n>>\nstartxref\n301\n%%EOF'
        
        # Simulate component creation
        components = self.create_document_components(doc_uuid, pdf_name, test_pdf_content)
        
        # Generate proposed requests
        proposed_requests = []
        
        # 1. Metadata upload
        metadata_hash = self.calculate_hash(components['metadata'])
        proposed_requests.append({
            'step': 'metadata_upload',
            'method': 'PUT',
            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}',
            'headers': self.get_our_headers('metadata', doc_uuid, len(components['metadata'])),
            'body': components['metadata'],
            'body_size': len(components['metadata'])
        })
        
        # 2. Content upload
        content_hash = self.calculate_hash(components['content'])
        proposed_requests.append({
            'step': 'content_upload',
            'method': 'PUT',
            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{content_hash}',
            'headers': self.get_our_headers('content', doc_uuid, len(components['content'])),
            'body': components['content'],
            'body_size': len(components['content'])
        })
        
        # 3. PDF upload
        pdf_hash = self.calculate_hash(test_pdf_content)
        proposed_requests.append({
            'step': 'pdf_upload',
            'method': 'PUT',
            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{pdf_hash}',
            'headers': self.get_our_headers('pdf', doc_uuid, len(test_pdf_content)),
            'body': test_pdf_content,
            'body_size': len(test_pdf_content)
        })
        
        # 4. Pagedata upload
        pagedata_hash = self.calculate_hash(components['pagedata'])
        proposed_requests.append({
            'step': 'pagedata_upload',
            'method': 'PUT',
            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{pagedata_hash}',
            'headers': self.get_our_headers('pagedata', doc_uuid, len(components['pagedata'])),
            'body': components['pagedata'],
            'body_size': len(components['pagedata'])
        })
        
        # 5. DocSchema upload
        docschema_content = self.create_docschema(components, pdf_hash, doc_uuid)
        docschema_hash = self.calculate_hash(docschema_content)
        proposed_requests.append({
            'step': 'docschema_upload',
            'method': 'PUT',
            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{docschema_hash}',
            'headers': self.get_our_headers('docschema', doc_uuid, len(docschema_content)),
            'body': docschema_content,
            'body_size': len(docschema_content)
        })
        
        # 6. Root update (simulated)
        proposed_requests.append({
            'step': 'root_update',
            'method': 'PUT',
            'url': 'https://eu.tectonic.remarkable.com/sync/v3/root',
            'headers': self.get_our_headers('root', doc_uuid, 200),  # Estimated size
            'body': 'ROOT_UPDATE_CONTENT',
            'body_size': 200
        })
        
        return proposed_requests
    
    def create_document_components(self, doc_uuid: str, pdf_name: str, pdf_content: bytes) -> Dict:
        """Create all document components as our implementation would"""
        
        # Metadata
        metadata = {
            "createdTime": str(int(time.time() * 1000)),
            "lastModified": str(int(time.time() * 1000)),
            "lastOpened": "0",  # ✅ FIXED: Always "0" like real app
            "lastOpenedPage": 0,
            "metadatamodified": False,
            "modified": False,
            "parent": "",
            "pinned": False,
            "source": "com.remarkable.macos",  # ✅ FIXED: Changed from windows to macos
            "type": "DocumentType",
            "visibleName": pdf_name,
            "version": 1
        }
        
        # Content
        content = {
            "coverPageNumber": 0,
            "customZoomCenterX": 0,
            "customZoomCenterY": 936,
            "customZoomOrientation": "portrait",
            "customZoomPageHeight": 1872,
            "customZoomPageWidth": 1404,
            "customZoomScale": 1,
            "documentMetadata": {},
            "extraMetadata": {},
            "fileType": "pdf",
            "fontName": "",
            "formatVersion": 1,
            "lineHeight": -1,
            "orientation": "portrait",
            "originalPageCount": 1,
            "pageCount": 1,
            "pageTags": [],
            "pages": [str(uuid.uuid4())],
            "redirectionPageMap": [0],
            "sizeInBytes": str(len(pdf_content)),
            "tags": [],
            "textAlignment": "justify",
            "textScale": 1,
            "zoomMode": "bestFit"
        }
        
        # Pagedata (our current implementation)
        pagedata = "\n"  # ✅ FIXED: Changed from empty string to newline like real app
        
        return {
            'metadata': json.dumps(metadata).encode('utf-8'),
            'content': json.dumps(content).encode('utf-8'),
            'pagedata': pagedata.encode('utf-8')
        }
    
    def create_docschema(self, components: Dict, pdf_hash: str, doc_uuid: str) -> bytes:
        """Create docSchema as our implementation would"""
        metadata_hash = self.calculate_hash(components['metadata'])
        content_hash = self.calculate_hash(components['content'])
        pagedata_hash = self.calculate_hash(components['pagedata'])
        
        lines = [
            "3",  # Version
            f"{metadata_hash}:80000000:{doc_uuid}.metadata:0:{len(components['metadata'])}",
            f"{content_hash}:80000000:{doc_uuid}.content:0:{len(components['content'])}",
            f"{pdf_hash}:80000000:{doc_uuid}.pdf:0:{len(b'PDF_CONTENT')}",  # Placeholder
            f"{pagedata_hash}:80000000:{doc_uuid}.pagedata:0:{len(components['pagedata'])}"
        ]
        
        return '\n'.join(lines).encode('utf-8')
    
    def get_our_headers(self, file_type: str, doc_uuid: str, content_size: int) -> Dict:
        """Generate headers as our implementation would"""
        
        # Get authorization token from our session
        auth_header = ""
        if self.auth_session and hasattr(self.auth_session, 'headers'):
            auth_header = self.auth_session.headers.get('Authorization', '')
        
        # Base headers
        headers = {
            'host': 'eu.tectonic.remarkable.com',
            'authorization': auth_header,
            'content-type': 'application/octet-stream',
            'rm-batch-number': '1',
            'rm-sync-id': str(uuid.uuid4()),
            'user-agent': 'desktop/3.20.0.922 (macos 15.4)',  # ✅ FIXED: Matches real app exactly
            'content-length': str(content_size),
            'connection': 'Keep-Alive',
            'accept-encoding': 'gzip, deflate',
            'accept-language': 'en-US,*'
        }
        
        # File-specific headers
        if file_type == 'metadata':
            headers['rm-filename'] = f'{doc_uuid}.metadata'
        elif file_type == 'content':
            headers['rm-filename'] = f'{doc_uuid}.content'
        elif file_type == 'pdf':
            headers['rm-filename'] = f'{doc_uuid}.pdf'
        elif file_type == 'pagedata':
            headers['rm-filename'] = f'{doc_uuid}.pagedata'
        elif file_type == 'docschema':
            headers['rm-filename'] = f'{doc_uuid}'
        
        # Calculate CRC32C hash (simplified for dry run)
        headers['x-goog-hash'] = f'crc32c={base64.b64encode(b"dummy_hash").decode()}'
        
        return headers
    
    def calculate_hash(self, content: bytes) -> str:
        """Calculate SHA256 hash"""
        return hashlib.sha256(content).hexdigest()
    
    def compare_with_real_app(self, our_requests: List[Dict]) -> Dict:
        """Compare our proposed requests with real app logs"""
        print(f"\n🔍 Comparing Implementation vs Real App")
        print("=" * 60)
        
        # Filter real app logs for file uploads
        real_uploads = [log for log in self.real_app_logs 
                       if log['method'] == 'PUT' and 'sync/v3/files' in log.get('url', '')]
        
        print(f"📊 Our implementation: {len(our_requests)} requests")
        print(f"📊 Real app: {len(real_uploads)} uploads")
        
        differences = {
            'header_differences': [],
            'sequence_differences': [],
            'content_differences': [],
            'critical_issues': []
        }
        
        # Compare headers for each type
        for our_req in our_requests:
            print(f"\n🔍 Analyzing {our_req['step']}:")
            
            # Find corresponding real app request
            real_req = None
            for real in real_uploads:
                if 'request_content' in real and our_req['step'] in ['metadata_upload', 'pdf_upload']:
                    real_req = real
                    break
            
            if real_req and 'request_content' in real_req:
                self.compare_headers(our_req, real_req, differences)
            
            print(f"   📏 Our body size: {our_req['body_size']} bytes")
            print(f"   🔗 Our URL: {our_req['url']}")
            print(f"   📋 Our headers preview:")
            for key, value in our_req['headers'].items():
                if key in ['user-agent', 'authorization', 'rm-filename', 'x-goog-hash']:
                    print(f"      {key}: {value[:50]}{'...' if len(str(value)) > 50 else ''}")
        
        # Analyze critical differences
        self.analyze_critical_differences(differences)
        
        return differences
    
    def compare_headers(self, our_req: Dict, real_req: Dict, differences: Dict):
        """Compare headers between our implementation and real app"""
        try:
            # Parse real app request headers
            real_content = real_req['request_content']
            real_headers = {}
            
            lines = real_content.split('\n')
            for line in lines[1:]:  # Skip first line (PUT ...)
                if ':' in line and not line.startswith('PUT') and not line.startswith('<Data'):
                    key, value = line.split(':', 1)
                    real_headers[key.strip().lower()] = value.strip()
                elif line.startswith('<Data'):
                    break
            
            # Compare key headers
            our_headers = {k.lower(): v for k, v in our_req['headers'].items()}
            
            critical_headers = ['user-agent', 'authorization', 'content-type', 'rm-filename', 'x-goog-hash']
            
            for header in critical_headers:
                our_value = our_headers.get(header, 'MISSING')
                real_value = real_headers.get(header, 'MISSING')
                
                if our_value != real_value:
                    diff = {
                        'step': our_req['step'],
                        'header': header,
                        'our_value': our_value,
                        'real_value': real_value,
                        'critical': header in ['user-agent', 'authorization']
                    }
                    differences['header_differences'].append(diff)
                    
                    print(f"   ⚠️ Header difference - {header}:")
                    print(f"      Our: {our_value[:50]}{'...' if len(str(our_value)) > 50 else ''}")
                    print(f"      Real: {real_value[:50]}{'...' if len(str(real_value)) > 50 else ''}")
        
        except Exception as e:
            print(f"   ❌ Header comparison failed: {e}")
    
    def analyze_critical_differences(self, differences: Dict):
        """Analyze and highlight critical differences"""
        print(f"\n🚨 CRITICAL DIFFERENCES ANALYSIS")
        print("=" * 60)
        
        # Group differences by criticality
        critical_issues = []
        
        for diff in differences['header_differences']:
            if diff['critical'] or diff['header'] in ['user-agent', 'authorization', 'source']:
                critical_issues.append(diff)
        
        if critical_issues:
            print(f"❌ Found {len(critical_issues)} critical issues:")
            for issue in critical_issues:
                print(f"   🔴 {issue['step']} - {issue['header']}")
                print(f"      Problem: {issue['our_value'][:30]} vs {issue['real_value'][:30]}")
        else:
            print("✅ No critical header differences found")
        
        # Check for user-agent mismatch
        ua_issues = [d for d in differences['header_differences'] if d['header'] == 'user-agent']
        if ua_issues:
            print(f"\n🔴 USER-AGENT MISMATCH (CRITICAL):")
            for ua in ua_issues:
                print(f"   Our: {ua['our_value']}")
                print(f"   Real: {ua['real_value']}")
                print(f"   Impact: Version/platform detection issues")
        
        # Check for authorization differences
        auth_issues = [d for d in differences['header_differences'] if d['header'] == 'authorization']
        if auth_issues:
            print(f"\n🔴 AUTHORIZATION DIFFERENCES (CRITICAL):")
            print(f"   This could cause authentication/device recognition issues")
        
        differences['critical_issues'] = critical_issues
    
    def generate_fix_recommendations(self, differences: Dict) -> List[str]:
        """Generate specific recommendations to fix differences"""
        recommendations = []
        
        # User-Agent fixes
        ua_issues = [d for d in differences['header_differences'] if d['header'] == 'user-agent']
        if ua_issues:
            real_ua = ua_issues[0]['real_value']
            recommendations.append(f"UPDATE: Change user-agent to: {real_ua}")
        
        # Authorization fixes
        auth_issues = [d for d in differences['header_differences'] if d['header'] == 'authorization']
        if auth_issues:
            recommendations.append("UPDATE: Fix JWT token to match macOS device description")
        
        # Content-Type fixes
        ct_issues = [d for d in differences['header_differences'] if d['header'] == 'content-type']
        if ct_issues:
            recommendations.append("UPDATE: Standardize content-type to application/octet-stream")
        
        # Source field fix (from metadata analysis)
        recommendations.append("UPDATE: Change metadata source from 'com.remarkable.windows' to 'com.remarkable.macos'")
        recommendations.append("UPDATE: Change pagedata from empty string to '\\n' character")
        recommendations.append("UPDATE: Set lastOpened to '0' consistently")
        
        return recommendations
                        

Improved Code

class DryRunUploadComparison

Purpose

Source Code

Parameters

Parameter Details

Return Value

Class Interface

Methods

__init__(self)

load_real_app_logs(self) -> List[Dict]

simulate_pdf_upload(self, pdf_name: str = 'TestDocument') -> List[Dict]

create_document_components(self, doc_uuid: str, pdf_name: str, pdf_content: bytes) -> Dict

create_docschema(self, components: Dict, pdf_hash: str, doc_uuid: str) -> bytes

get_our_headers(self, file_type: str, doc_uuid: str, content_size: int) -> Dict

calculate_hash(self, content: bytes) -> str

compare_with_real_app(self, our_requests: List[Dict]) -> Dict

compare_headers(self, our_req: Dict, real_req: Dict, differences: Dict)

analyze_critical_differences(self, differences: Dict)

generate_fix_recommendations(self, differences: Dict) -> List[str]

Attributes

Dependencies

Required Imports

Conditional/Optional Imports

Usage Example

Best Practices

Tags

Similar Components

function main_v77 76.3% similar

function main_v15 72.1% similar

class FixedUploadTest 71.5% similar

class SimplePDFUploadTest 71.0% similar

function test_quick_upload 68.1% similar

✨ Improve Code: DryRunUploadComparison

Code Comparison

`init(self)`

`load_real_app_logs(self) -> List[Dict]`

`simulate_pdf_upload(self, pdf_name: str = 'TestDocument') -> List[Dict]`

`create_document_components(self, doc_uuid: str, pdf_name: str, pdf_content: bytes) -> Dict`

`create_docschema(self, components: Dict, pdf_hash: str, doc_uuid: str) -> bytes`

`get_our_headers(self, file_type: str, doc_uuid: str, content_size: int) -> Dict`

`calculate_hash(self, content: bytes) -> str`

`compare_with_real_app(self, our_requests: List[Dict]) -> Dict`

`compare_headers(self, our_req: Dict, real_req: Dict, differences: Dict)`

`analyze_critical_differences(self, differences: Dict)`

`generate_fix_recommendations(self, differences: Dict) -> List[str]`