class RemarkableLocalReplica
Builds and maintains a complete local replica of reMarkable cloud
/tf/active/vicechatdev/e-ink-llm/cloudtest/local_replica.py
93 - 618
moderate
Purpose
Builds and maintains a complete local replica of reMarkable cloud
Source Code
class RemarkableLocalReplica:
"""Builds and maintains a complete local replica of reMarkable cloud"""
def __init__(self, session: requests.Session, replica_dir: str = "remarkable_replica"):
self.session = session
self.base_url = "https://eu.tectonic.remarkable.com"
# Setup replica directory structure
self.replica_dir = Path(replica_dir).resolve()
self.content_dir = self.replica_dir / "content"
self.metadata_dir = self.replica_dir / "metadata"
self.raw_dir = self.replica_dir / "raw_components"
# Create directory structure
for directory in [self.replica_dir, self.content_dir, self.metadata_dir, self.raw_dir]:
directory.mkdir(parents=True, exist_ok=True)
# Metadata database
self.database_file = self.replica_dir / "replica_database.json"
self.sync_log_file = self.replica_dir / "sync_log.json"
# Setup logging
self.log_file = self.replica_dir / "replica_build.log"
self.setup_logging()
# State tracking
self.nodes: Dict[str, ReplicaNode] = {}
self.hierarchy: Dict[str, List[str]] = {} # parent_uuid -> [child_uuids]
self.failed_downloads: Set[str] = set()
# Statistics
self.stats = {
'total_nodes': 0,
'folders': 0,
'documents': 0,
'notebooks': 0,
'pdfs_extracted': 0,
'notebooks_extracted': 0,
'total_files': 0,
'total_size': 0,
'failed_extractions': 0
}
def setup_logging(self):
"""Setup comprehensive logging"""
self.logger = logging.getLogger('RemarkableReplica')
self.logger.setLevel(logging.DEBUG)
self.logger.handlers.clear()
# File handler
file_handler = logging.FileHandler(self.log_file, mode='w', encoding='utf-8')
file_handler.setLevel(logging.DEBUG)
file_formatter = logging.Formatter(
'%(asctime)s | %(levelname)-8s | %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
file_handler.setFormatter(file_formatter)
# Console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_formatter = logging.Formatter('%(message)s')
console_handler.setFormatter(console_formatter)
self.logger.addHandler(file_handler)
self.logger.addHandler(console_handler)
self.logger.info(f"šļø REMARKABLE LOCAL REPLICA BUILDER STARTED")
self.logger.info(f"š Replica directory: {self.replica_dir}")
self.logger.info(f"š Build log: {self.log_file}")
def fetch_hash_content(self, hash_ref: str) -> Optional[Dict[str, Any]]:
"""Fetch content from reMarkable cloud by hash"""
try:
url = f"{self.base_url}/sync/v3/files/{hash_ref}"
self.logger.debug(f"FETCHING: {url}")
response = self.session.get(url)
response.raise_for_status()
content_type = response.headers.get('content-type', '')
content = response.content
self.logger.debug(f" Response: {len(content)} bytes, {content_type}")
return {
'hash': hash_ref,
'size': len(content),
'content': content,
'content_type': content_type
}
except Exception as e:
self.logger.error(f"Failed to fetch {hash_ref[:16]}...: {e}")
self.failed_downloads.add(hash_ref)
return None
def get_root_hash(self) -> Optional[str]:
"""Get the root hash from reMarkable cloud"""
try:
url = f"{self.base_url}/sync/v4/root"
response = self.session.get(url)
response.raise_for_status()
data = response.json()
root_hash = data.get('hash')
self.logger.info(f"š± Root hash: {root_hash}")
return root_hash
except Exception as e:
self.logger.error(f"Failed to get root hash: {e}")
return None
def parse_directory_listing(self, content: bytes) -> Dict[str, Any]:
"""Parse directory listing content"""
try:
text_content = content.decode('utf-8')
except UnicodeDecodeError:
self.logger.debug("Failed to decode as text, treating as binary")
return {'is_directory': False, 'child_objects': [], 'data_components': []}
result = {
'is_directory': False,
'child_objects': [],
'data_components': []
}
lines = text_content.split('\n')
if lines and lines[0].strip().isdigit():
lines = lines[1:] # Skip count line
import re
entry_pattern = r'^([a-f0-9]{64}):([0-9a-fA-F]+):([a-f0-9-]+(?:\.[^:]+)?):(\d+):(\d+)$'
for line in lines:
line = line.strip()
if not line:
continue
match = re.match(entry_pattern, line, re.IGNORECASE)
if match:
hash_val, flags, uuid_component, type_val, size_val = match.groups()
entry_info = {
'hash': hash_val,
'flags': flags,
'uuid_component': uuid_component,
'type': type_val,
'size': int(size_val)
}
if '.' in uuid_component:
# Data component
component_type = uuid_component.split('.')[-1]
entry_info['component_type'] = component_type
result['data_components'].append(entry_info)
else:
# Child object
result['child_objects'].append(entry_info)
result['is_directory'] = True
return result
def extract_metadata(self, hash_ref: str) -> Optional[Dict[str, Any]]:
"""Extract metadata from a metadata component"""
content_info = self.fetch_hash_content(hash_ref)
if not content_info:
return None
try:
text_content = content_info['content'].decode('utf-8')
return json.loads(text_content)
except (UnicodeDecodeError, json.JSONDecodeError) as e:
self.logger.debug(f"Failed to parse metadata: {e}")
return None
def save_raw_component(self, hash_ref: str, content: bytes, component_type: str) -> str:
"""Save raw component to disk and return the file path"""
filename = f"{hash_ref}_{component_type}"
filepath = self.raw_dir / filename
with open(filepath, 'wb') as f:
f.write(content)
return str(filepath)
def extract_pdf_from_component(self, pdf_hash: str, target_path: Path) -> bool:
"""Extract PDF from a PDF component"""
try:
content_info = self.fetch_hash_content(pdf_hash)
if not content_info:
return False
with open(target_path, 'wb') as f:
f.write(content_info['content'])
self.logger.debug(f" š Extracted PDF: {target_path}")
return True
except Exception as e:
self.logger.error(f"Failed to extract PDF {pdf_hash[:16]}...: {e}")
return False
def extract_notebook_components(self, node: ReplicaNode, components: Dict[str, str]) -> List[str]:
"""Extract reMarkable notebook components"""
extracted_files = []
# Create notebook directory
notebook_dir = Path(node.local_path).parent / f"{Path(node.local_path).stem}_notebook"
notebook_dir.mkdir(exist_ok=True)
for component_type, hash_ref in components.items():
if not hash_ref:
continue
try:
content_info = self.fetch_hash_content(hash_ref)
if not content_info:
continue
if component_type == 'content':
# Save content file
content_file = notebook_dir / "content"
with open(content_file, 'wb') as f:
f.write(content_info['content'])
extracted_files.append(str(content_file))
elif component_type == 'metadata':
# Already processed for main metadata
pass
elif component_type == 'pagedata':
# Save pagedata
pagedata_file = notebook_dir / "pagedata"
with open(pagedata_file, 'wb') as f:
f.write(content_info['content'])
extracted_files.append(str(pagedata_file))
elif component_type.endswith('.rm'):
# Save .rm files (stroke data)
rm_file = notebook_dir / component_type.split('/')[-1]
with open(rm_file, 'wb') as f:
f.write(content_info['content'])
extracted_files.append(str(rm_file))
node.rm_files.append(str(rm_file))
except Exception as e:
self.logger.error(f"Failed to extract {component_type}: {e}")
return extracted_files
def build_node_path(self, node_uuid: str, name: str, parent_path: str = "") -> str:
"""Build the local file system path for a node"""
if parent_path:
base_path = Path(parent_path)
else:
base_path = self.content_dir
# Sanitize filename
safe_name = "".join(c for c in name if c.isalnum() or c in (' ', '-', '_', '.')).rstrip()
if not safe_name:
safe_name = f"unnamed_{node_uuid[:8]}"
return str(base_path / safe_name)
def discover_node(self, hash_ref: str, parent_uuid: str = None, parent_path: str = "", depth: int = 0) -> Optional[ReplicaNode]:
"""Discover and process a single node"""
if hash_ref in self.failed_downloads:
return None
self.logger.debug(f"{' ' * depth}š DISCOVERING: {hash_ref[:16]}... (depth {depth})")
# Fetch content
content_info = self.fetch_hash_content(hash_ref)
if not content_info:
return None
# Parse content
parsed = self.parse_directory_listing(content_info['content'])
# Find metadata component and extract metadata
metadata = {}
metadata_hash = None
node_name = f"unknown_{hash_ref[:8]}"
node_type = "folder"
metadata_parent_uuid = None
for component in parsed['data_components']:
if component['component_type'] == 'metadata':
metadata_hash = component['hash']
extracted_metadata = self.extract_metadata(metadata_hash)
if extracted_metadata:
metadata = extracted_metadata
node_name = metadata.get('visibleName', node_name)
if metadata.get('type') == 'DocumentType':
node_type = "document"
elif metadata.get('type') == 'CollectionType':
node_type = "folder"
# Use the parent from metadata (this is the TRUE parent)
metadata_parent_uuid = metadata.get('parent', '') or None
break
# Determine UUID from metadata or fallback
node_uuid = None
for component in parsed['child_objects']:
node_uuid = component['uuid_component']
break
if not node_uuid and parsed['data_components']:
# Extract UUID from component name
component_name = parsed['data_components'][0]['uuid_component']
if '.' in component_name:
node_uuid = component_name.split('.')[0]
if not node_uuid:
node_uuid = hash_ref[:32] # Fallback
# Build proper local path using metadata parent UUID
actual_parent_uuid = metadata_parent_uuid or parent_uuid
# Find the actual parent's local path
if actual_parent_uuid and actual_parent_uuid in self.nodes:
parent_node = self.nodes[actual_parent_uuid]
actual_parent_path = parent_node.local_path
else:
actual_parent_path = str(self.content_dir)
local_path = self.build_node_path(node_uuid, node_name, actual_parent_path)
# Create replica node
node = ReplicaNode(
uuid=node_uuid,
hash=hash_ref,
name=node_name,
node_type=node_type,
parent_uuid=actual_parent_uuid,
parent_path=actual_parent_path,
local_path=local_path,
depth=depth,
sync_timestamp=datetime.now().isoformat(),
sync_hash=hash_ref
)
# Apply metadata fields safely
for key, value in metadata.items():
if hasattr(node, key):
setattr(node, key, value)
# Extract component hashes and collect .rm files
rm_file_info = [] # Store info about .rm files
for component in parsed['data_components']:
comp_type = component['component_type']
comp_hash = component['hash']
if comp_type == 'content':
node.content_hash = comp_hash
elif comp_type == 'metadata':
node.metadata_hash = comp_hash
elif comp_type == 'pdf':
node.pdf_hash = comp_hash
elif comp_type == 'pagedata':
node.pagedata_hash = comp_hash
elif comp_type.endswith('.rm'):
# Store .rm file info for extraction
rm_file_info.append({
'hash': comp_hash,
'filename': comp_type.split('/')[-1] # Extract just the filename
})
# Create local directory/file structure
if node_type == "folder":
Path(local_path).mkdir(parents=True, exist_ok=True)
self.stats['folders'] += 1
else:
Path(local_path).parent.mkdir(parents=True, exist_ok=True)
# Extract PDF content if available
if node.pdf_hash:
pdf_path = Path(local_path).with_suffix('.pdf')
if self.extract_pdf_from_component(node.pdf_hash, pdf_path):
node.extracted_files.append(str(pdf_path))
self.stats['pdfs_extracted'] += 1
# Extract .rm files if available
if rm_file_info:
notebook_dir = Path(local_path).parent / f"{Path(local_path).stem}_notebook"
notebook_dir.mkdir(exist_ok=True)
for rm_info in rm_file_info:
rm_path = notebook_dir / rm_info['filename']
if self.extract_rm_file(rm_info['hash'], rm_path):
node.rm_files.append(str(rm_path))
node.extracted_files.append(str(rm_path))
# Extract other notebook components
notebook_components = {}
for component in parsed['data_components']:
comp_type = component['component_type']
if comp_type in ['content', 'pagedata']:
notebook_components[comp_type] = component['hash']
if notebook_components:
extracted = self.extract_notebook_components(node, notebook_components)
node.extracted_files.extend(extracted)
if extracted:
self.stats['notebooks_extracted'] += 1
if node_type == "document":
self.stats['documents'] += 1
else:
self.stats['notebooks'] += 1
# Store node BEFORE processing children (so children can find their parents)
self.nodes[node_uuid] = node
self.stats['total_nodes'] += 1
# Track hierarchy
if actual_parent_uuid:
if actual_parent_uuid not in self.hierarchy:
self.hierarchy[actual_parent_uuid] = []
self.hierarchy[actual_parent_uuid].append(node_uuid)
self.logger.info(f"{' ' * depth}ā
{node_type}: {node_name} | {len(parsed['child_objects'])} children")
# Recursively discover children
for child_obj in parsed['child_objects']:
child_hash = child_obj['hash']
self.discover_node(child_hash, node_uuid, local_path, depth + 1)
return node
def extract_rm_file(self, rm_hash: str, target_path: Path) -> bool:
"""Extract .rm file from reMarkable cloud"""
try:
content_info = self.fetch_hash_content(rm_hash)
if not content_info:
return False
with open(target_path, 'wb') as f:
f.write(content_info['content'])
self.logger.debug(f" šļø Extracted .rm file: {target_path}")
return True
except Exception as e:
self.logger.error(f"Failed to extract .rm file {rm_hash[:16]}...: {e}")
return False
def save_database(self):
"""Save the complete metadata database"""
database = {
'replica_info': {
'created': datetime.now().isoformat(),
'replica_dir': str(self.replica_dir),
'total_nodes': len(self.nodes),
'statistics': self.stats
},
'nodes': {uuid: asdict(node) for uuid, node in self.nodes.items()},
'hierarchy': self.hierarchy,
'failed_downloads': list(self.failed_downloads)
}
with open(self.database_file, 'w', encoding='utf-8') as f:
json.dump(database, f, indent=2, ensure_ascii=False)
self.logger.info(f"š¾ Database saved: {self.database_file}")
def save_sync_log(self):
"""Save sync log for future incremental updates"""
sync_log = {
'last_sync': datetime.now().isoformat(),
'root_hash': getattr(self, 'root_hash', ''),
'nodes_synced': len(self.nodes),
'sync_hashes': {uuid: node.sync_hash for uuid, node in self.nodes.items()},
'statistics': self.stats
}
with open(self.sync_log_file, 'w', encoding='utf-8') as f:
json.dump(sync_log, f, indent=2)
self.logger.info(f"š Sync log saved: {self.sync_log_file}")
def build_complete_replica(self) -> bool:
"""Build the complete local replica"""
self.logger.info(f"š STARTING COMPLETE REPLICA BUILD")
# Get root hash
root_hash = self.get_root_hash()
if not root_hash:
self.logger.error("ā Failed to get root hash")
return False
self.root_hash = root_hash
# Discover from root
self.logger.info(f"š Starting discovery from root: {root_hash}")
root_node = self.discover_node(root_hash)
if not root_node:
self.logger.error("ā Failed to discover root node")
return False
# Calculate final statistics
self.stats['total_files'] = sum(len(node.extracted_files) for node in self.nodes.values())
# Save database and sync log
self.save_database()
self.save_sync_log()
# Final report
self.logger.info(f"\nš REPLICA BUILD COMPLETED!")
self.logger.info(f"š FINAL STATISTICS:")
self.logger.info(f" š Total nodes: {self.stats['total_nodes']}")
self.logger.info(f" š Folders: {self.stats['folders']}")
self.logger.info(f" š Documents: {self.stats['documents']}")
self.logger.info(f" š Notebooks: {self.stats['notebooks']}")
self.logger.info(f" š PDFs extracted: {self.stats['pdfs_extracted']}")
self.logger.info(f" š Notebooks extracted: {self.stats['notebooks_extracted']}")
self.logger.info(f" š Total files: {self.stats['total_files']}")
self.logger.info(f" ā Failed downloads: {len(self.failed_downloads)}")
self.logger.info(f"\nš Replica location: {self.replica_dir}")
self.logger.info(f"š¾ Database: {self.database_file}")
self.logger.info(f"š Sync log: {self.sync_log_file}")
return True
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
bases: Parameter of type
Return Value
Returns unspecified type
Class Interface
Methods
__init__(self, session, replica_dir)
Purpose: Internal method: init
Parameters:
session: Type: requests.Sessionreplica_dir: Type: str
Returns: None
setup_logging(self)
Purpose: Setup comprehensive logging
Returns: None
fetch_hash_content(self, hash_ref) -> Optional[Dict[str, Any]]
Purpose: Fetch content from reMarkable cloud by hash
Parameters:
hash_ref: Type: str
Returns: Returns Optional[Dict[str, Any]]
get_root_hash(self) -> Optional[str]
Purpose: Get the root hash from reMarkable cloud
Returns: Returns Optional[str]
parse_directory_listing(self, content) -> Dict[str, Any]
Purpose: Parse directory listing content
Parameters:
content: Type: bytes
Returns: Returns Dict[str, Any]
extract_metadata(self, hash_ref) -> Optional[Dict[str, Any]]
Purpose: Extract metadata from a metadata component
Parameters:
hash_ref: Type: str
Returns: Returns Optional[Dict[str, Any]]
save_raw_component(self, hash_ref, content, component_type) -> str
Purpose: Save raw component to disk and return the file path
Parameters:
hash_ref: Type: strcontent: Type: bytescomponent_type: Type: str
Returns: Returns str
extract_pdf_from_component(self, pdf_hash, target_path) -> bool
Purpose: Extract PDF from a PDF component
Parameters:
pdf_hash: Type: strtarget_path: Type: Path
Returns: Returns bool
extract_notebook_components(self, node, components) -> List[str]
Purpose: Extract reMarkable notebook components
Parameters:
node: Type: ReplicaNodecomponents: Type: Dict[str, str]
Returns: Returns List[str]
build_node_path(self, node_uuid, name, parent_path) -> str
Purpose: Build the local file system path for a node
Parameters:
node_uuid: Type: strname: Type: strparent_path: Type: str
Returns: Returns str
discover_node(self, hash_ref, parent_uuid, parent_path, depth) -> Optional[ReplicaNode]
Purpose: Discover and process a single node
Parameters:
hash_ref: Type: strparent_uuid: Type: strparent_path: Type: strdepth: Type: int
Returns: Returns Optional[ReplicaNode]
extract_rm_file(self, rm_hash, target_path) -> bool
Purpose: Extract .rm file from reMarkable cloud
Parameters:
rm_hash: Type: strtarget_path: Type: Path
Returns: Returns bool
save_database(self)
Purpose: Save the complete metadata database
Returns: None
save_sync_log(self)
Purpose: Save sync log for future incremental updates
Returns: None
build_complete_replica(self) -> bool
Purpose: Build the complete local replica
Returns: Returns bool
Required Imports
import os
import json
import zipfile
import requests
import logging
Usage Example
# Example usage:
# result = RemarkableLocalReplica(bases)
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class RemarkableReplicaSync 78.0% similar
-
function test_complete_replica_build 71.9% similar
-
class ReplicaNode 65.4% similar
-
class RemarkableReplicaBuilder 64.4% similar
-
class RemarkableReplicaSync_v1 64.2% similar