class ReferenceManager_v1
Manages document references for inline citation and bibliography generation, tracking documents and generating formatted citations and bibliographies.
/tf/active/vicechatdev/improved_project_victoria_generator.py
48 - 145
simple
Purpose
The ReferenceManager class provides a centralized system for managing document references in a citation system. It assigns unique reference numbers to documents, tracks which documents have been referenced, generates inline citations (e.g., [1], [2]), and produces formatted bibliographies with source information and content previews. This is useful for RAG systems, document processing pipelines, or any application requiring academic-style citation management.
Source Code
class ReferenceManager:
"""
Manages document references for inline citation and bibliography generation.
Similar to the system used in datacapture.py.
"""
def __init__(self):
self.references = {} # Map reference_id -> document info
self.reference_counter = 1
self.used_documents = set() # Track which documents have been referenced
def add_document(self, doc_id: str, content: str, metadata: Dict = None) -> int:
"""
Add a document to the reference system and return its reference number.
Args:
doc_id: Unique document identifier
content: Document content
metadata: Document metadata
Returns:
Reference number for inline citation
"""
# Check if this document is already referenced
for ref_num, ref_info in self.references.items():
if ref_info['doc_id'] == doc_id:
return ref_num
# Add new reference
ref_num = self.reference_counter
self.references[ref_num] = {
'doc_id': doc_id,
'content': content,
'metadata': metadata or {},
'source': self.extract_source_info(metadata or {}),
'preview': content[:300] + "..." if len(content) > 300 else content
}
self.reference_counter += 1
return ref_num
def extract_source_info(self, metadata: Dict) -> str:
"""Extract readable source information from metadata."""
if not metadata:
return "Unknown source"
# Try various metadata fields for source information
source_fields = ['source', 'title', 'filename', 'document_title', 'file_path']
for field in source_fields:
if field in metadata and metadata[field]:
return str(metadata[field])
# Fallback to document type or generic identifier
doc_type = metadata.get('type', metadata.get('document_type', 'Document'))
return f"{doc_type} (ID: {metadata.get('id', 'unknown')})"
def get_citation(self, doc_id: str) -> str:
"""
Get inline citation for a document.
Args:
doc_id: Document identifier
Returns:
Inline citation string like [1]
"""
for ref_num, ref_info in self.references.items():
if ref_info['doc_id'] == doc_id:
return f"[{ref_num}]"
return "[?]" # Should not happen if add_document was called first
def generate_bibliography(self) -> str:
"""
Generate a formatted bibliography section.
Returns:
Formatted bibliography in markdown
"""
if not self.references:
return "\n## References\n\nNo references available.\n"
bibliography = ["\n## References\n"]
for ref_num in sorted(self.references.keys()):
ref_info = self.references[ref_num]
source = ref_info['source']
preview = ref_info['preview']
bibliography.append(f"**[{ref_num}]** {source}")
bibliography.append(f" *Content preview*: {preview}")
bibliography.append("") # Empty line
return "\n".join(bibliography)
def clear(self):
"""Clear all references."""
self.references = {}
self.reference_counter = 1
self.used_documents = set()
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
__init__: No parameters required. Initializes an empty reference management system with internal state tracking.
Return Value
Instantiation returns a ReferenceManager object. Key method returns: add_document() returns an integer reference number; get_citation() returns a string citation like '[1]'; generate_bibliography() returns a markdown-formatted string containing all references; clear() returns None.
Class Interface
Methods
__init__(self)
Purpose: Initialize the ReferenceManager with empty state
Returns: None - initializes instance attributes
add_document(self, doc_id: str, content: str, metadata: Dict = None) -> int
Purpose: Add a document to the reference system and return its reference number, or return existing reference number if document already added
Parameters:
doc_id: Unique document identifier string used to track and deduplicate documentscontent: Full text content of the document to be referencedmetadata: Optional dictionary containing document metadata (source, title, filename, etc.)
Returns: Integer reference number (starting from 1) that can be used for inline citations
extract_source_info(self, metadata: Dict) -> str
Purpose: Extract human-readable source information from metadata dictionary by checking common metadata fields
Parameters:
metadata: Dictionary containing document metadata to extract source information from
Returns: String containing the best available source information (source name, title, filename, etc.) or a fallback description
get_citation(self, doc_id: str) -> str
Purpose: Get the inline citation string for a previously added document
Parameters:
doc_id: Document identifier for which to retrieve the citation
Returns: String in format '[N]' where N is the reference number, or '[?]' if document not found
generate_bibliography(self) -> str
Purpose: Generate a complete formatted bibliography section with all referenced documents
Returns: Markdown-formatted string containing a '## References' section with numbered entries, source information, and content previews
clear(self)
Purpose: Reset the reference manager to initial state, clearing all references and resetting the counter
Returns: None - modifies instance state in place
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
references |
Dict[int, Dict[str, Any]] | Dictionary mapping reference numbers (int) to document information dictionaries containing doc_id, content, metadata, source, and preview | instance |
reference_counter |
int | Counter tracking the next reference number to assign, starts at 1 and increments with each new document | instance |
used_documents |
set | Set intended to track which documents have been referenced (initialized but not actively used in current implementation) | instance |
Dependencies
typing
Required Imports
from typing import Dict
Usage Example
from typing import Dict
# Instantiate the reference manager
ref_manager = ReferenceManager()
# Add documents and get reference numbers
ref_num1 = ref_manager.add_document(
doc_id='doc_001',
content='This is the content of the first document with important information.',
metadata={'source': 'Research Paper A', 'author': 'John Doe'}
)
ref_num2 = ref_manager.add_document(
doc_id='doc_002',
content='Another document with different content that will be referenced.',
metadata={'title': 'Technical Report B', 'year': 2023}
)
# Get inline citations
citation1 = ref_manager.get_citation('doc_001') # Returns '[1]'
citation2 = ref_manager.get_citation('doc_002') # Returns '[2]'
# Use citations in text
text = f'According to the research {citation1}, we can see that {citation2}...'
# Generate bibliography
bibliography = ref_manager.generate_bibliography()
print(bibliography)
# Clear all references when done
ref_manager.clear()
Best Practices
- Always call add_document() before get_citation() to ensure the document is registered in the system
- Use the same doc_id consistently for the same document to avoid duplicate references
- Call generate_bibliography() at the end of document generation to append all references
- Use clear() when starting a new document or session to reset the reference counter
- Provide meaningful metadata (especially 'source', 'title', or 'filename') for better bibliography formatting
- The reference counter is sequential and starts at 1, incrementing with each new document
- Documents are deduplicated by doc_id - adding the same doc_id twice returns the same reference number
- The used_documents set is initialized but not actively used in the current implementation
- Content previews are automatically truncated to 300 characters for bibliography display
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class ReferenceManager 94.7% similar
-
class ReferenceManager_v3 73.8% similar
-
class ReferenceManager_v4 73.2% similar
-
class ReferenceManager_v2 72.9% similar
-
function parse_references_section 53.6% similar