ReferenceManager_v4 - Code Extractor

class ReferenceManager_v4

Maturity: 28

Manages extraction and formatting of references for LLM chat responses. Handles both file references and BibTeX citations, formatting them according to various academic citation styles.

File:
/tf/active/vicechatdev/OneCo_hybrid_RAG.py

Lines:
289 - 967

Complexity:
moderate

Purpose

Manages extraction and formatting of references for LLM chat responses. Handles both file references and BibTeX citations, formatting them according to various academic citation styles.

Source Code

class ReferenceManager:
    """
    Manages extraction and formatting of references for LLM chat responses.
    Handles both file references and BibTeX citations, formatting them according
    to various academic citation styles.
    """
    
    def __init__(self, default_style="apa"):
        """
        Initialize the reference manager.
        
        Args:
            default_style: Default citation style to use
        """
        self.default_style = default_style
        self.style_titles = {
            "apa": "References",
            "mla": "Works Cited",
            "chicago": "Bibliography",
            "ieee": "References",
            "harvard1": "References",
            "vancouver": "References",
            "nature": "References"
        }

    
    
    def extract_references(self, text):
        """
        Extract block references from text in various formats including:
        - Single blocks: [block 1] or [Block 1]
        - Multiple blocks: [Block 1, Block 2, Block 3] or [Block 1, 2, 3]
        
        Args:
            text: Text to extract references from
            
        Returns:
            list: List of block numbers referenced in the text
        """
        # Regex patterns to find different reference formats
        # Single block pattern: [block 1] or [Block 1]
        single_pattern = r'\[(?:[Bb]lock)\s+(\d+)\]'
        # Multiple blocks pattern: [Block 1, Block 2, Block 3]
        multi_block_pattern = r'\[(?:[Bb]lock\s+\d+(?:,\s*[Bb]lock\s+\d+)+)\]'
        # Multiple blocks pattern with abbreviated format: [Block 1, 2, 3]
        abbreviated_pattern = r'\[(?:[Bb]lock\s+\d+(?:,\s*\d+)+)\]'
        
        # Find all patterns and extract block numbers
        unique_refs = []
        
        # Extract single block references
        single_refs = re.findall(single_pattern, text)
        for ref in single_refs:
            block_num = int(ref)
            if block_num not in unique_refs:
                unique_refs.append(block_num)
        
        # Extract multiple block references with full "Block X" format
        multi_blocks = re.findall(multi_block_pattern, text)
        for block_group in multi_blocks:
            # Remove outer brackets
            clean_group = block_group[1:-1]
            # Find all block numbers in this group
            block_nums = re.findall(r'[Bb]lock\s+(\d+)', clean_group)
            for num in block_nums:
                block_num = int(num)
                if block_num not in unique_refs:
                    unique_refs.append(block_num)
        
        # Extract multiple block references with abbreviated format
        abbreviated_blocks = re.findall(abbreviated_pattern, text)
        for block_group in abbreviated_blocks:
            # Remove outer brackets
            clean_group = block_group[1:-1]
            # Find the first block number (with "Block" prefix)
            first_block_match = re.search(r'[Bb]lock\s+(\d+)', clean_group)
            if first_block_match:
                first_block = int(first_block_match.group(1))
                if first_block not in unique_refs:
                    unique_refs.append(first_block)
                
                # Find all additional numbers in this group
                additional_nums = re.findall(r',\s*(\d+)', clean_group)
                for num in additional_nums:
                    block_num = int(num)
                    if block_num not in unique_refs:
                        unique_refs.append(block_num)
        
        return unique_refs
    
    def process_references(self, text, blocks_dict, style=None):
        """
        Process references in text, lookup block information, and generate reference section.
        An improved approach to handle complex reference patterns reliably including ranges.
        
        Args:
            text: Text containing references
            blocks_dict: Dictionary with block data indexed by block number
            style: Citation style to use (defaults to self.default_style)
            
        Returns:
            tuple: (updated_text, references_section)
        """
        # Use default style if none specified
        if style is None:
            style = self.default_style
        
        # STEP 1: Find all block references in the text with enhanced pattern matching
        # This pattern also captures ranges with hyphens (e.g., [Blocks 2-11])
        all_refs_pattern = r'\[((?:[Bb]locks?\s+\d+(?:\s*[,-]\s*(?:[Bb]locks?\s*)?\d+)*)|(?:[Bb]locks?\s+\d+))\]'
        ref_matches = re.findall(all_refs_pattern, text)
        print(f"DEBUG: Found reference matches: {ref_matches}")
        
        # Create a mapping of original reference text to its position
        original_refs = {}
        for match in re.finditer(all_refs_pattern, text):
            original_refs[match.group(0)] = match.span()
        
        print(f"DEBUG: Original refs dict: {list(original_refs.keys())}")
        print(f"DEBUG: Available blocks in dict: {list(blocks_dict.keys())}")
        
        # STEP 2: Extract block numbers from each reference
        reference_catalog = {}
        for ref_text in ref_matches:
            # Handle ranges with hyphens (e.g., "Blocks 2-11")
            expanded_numbers = []
            
            # First check for ranges with hyphens
            range_matches = re.findall(r'(\d+)\s*-\s*(\d+)', ref_text)
            for start, end in range_matches:
                # Convert to integers and expand the range
                start_num, end_num = int(start), int(end)
                expanded_numbers.extend(range(start_num, end_num + 1))
            
            # Then add any individual numbers not in ranges
            # Replace ranges first to avoid counting them twice
            cleaned_text = re.sub(r'\d+\s*-\s*\d+', '', ref_text)
            individual_numbers = [int(num) for num in re.findall(r'\d+', cleaned_text)]
            expanded_numbers.extend(individual_numbers)
            
            # Remove duplicates and store in catalog
            block_numbers = sorted(list(set(expanded_numbers)))
            reference_catalog[f"[{ref_text}]"] = block_numbers
        
        # Collect all unique block numbers across all references
        all_block_numbers = []
        for numbers in reference_catalog.values():
            for num in numbers:
                if num not in all_block_numbers:
                    all_block_numbers.append(num)
        
        # STEP 3: Create references for all found block numbers
        references = []
        citation_map = {}
        
        for i, block_num in enumerate(all_block_numbers, 1):
            if block_num not in blocks_dict:
                print(f"Warning: Block {block_num} referenced but not found in blocks_dict")
                continue
                
            block_data = blocks_dict[block_num]
            block_type = block_data.get("type", "unknown")
            ref_id = str(i)
            
            # Create reference object based on block type
            if block_type == "document":
                # Document/file reference
                ref_obj = {
                    "id": ref_id,
                    "type": "file",
                    "path": block_data.get("path", ""),
                    "description": block_data.get("description", None)
                }
            elif block_type == "literature":
                # BibTeX reference
                ref_obj = {
                    "id": ref_id,
                    "type": "bibtex",
                    "content": block_data.get("bibtex", "")
                }
            elif block_type == "web":
                # Web reference
                ref_obj = {
                    "id": ref_id,
                    "type": "web",
                    "url": block_data.get("url", ""),
                    "title": block_data.get("title", "Web Page"),
                    "snippet": block_data.get("snippet", ""),
                    "date": block_data.get("date", "")
                }
            else:
                # Generic reference (fallback)
                ref_obj = {
                    "id": ref_id,
                    "type": "generic",
                    "content": block_data.get("content", "")
                }
                
            references.append(ref_obj)
            citation_map[block_num] = ref_id
        
        # Generate reference section
        print("citation map", citation_map)
        references_section, _ = self.generate_references_section(references, style)
        
        # STEP 4: Replace all original references with their numerical equivalents
        updated_text = text
        
        # Debug print to see what we're working with
        print(f"Original refs to process: {len(original_refs)}")
        
        # Sort references by length (longest first) to avoid partial replacements
        sorted_refs = sorted(original_refs.keys(), key=len, reverse=True)
        
        for original_ref in sorted_refs:
            # Get block numbers for this reference
            block_numbers = reference_catalog.get(original_ref, [])
            
            # Skip if no block numbers found (shouldn't happen with our regex)
            if not block_numbers:
                continue
                
            # Create a list of citation IDs for the block numbers
            ref_ids = []
            for block_num in block_numbers:
                if block_num in citation_map:
                    ref_ids.append(citation_map[block_num])
            
            # Only proceed if we have valid reference IDs
            if ref_ids:
                # Sort the reference IDs numerically before joining them
                # This ensures they appear in increasing order: [1, 2, 3] instead of [3, 1, 2]
                sorted_ref_ids = sorted(ref_ids, key=int)
                
                # Create the new reference string with comma-delimited ref_ids
                # Always using comma format even if original was a range
                new_ref = f"[{', '.join(sorted_ref_ids)}]"
                
                # Replace the entire original reference with the new formatted one
                updated_text = updated_text.replace(original_ref, new_ref)
                
                # Debug prints to see what's happening
                print(f"Replacing: {original_ref} -> {new_ref}")
        
        return updated_text, references_section
    
    def format_bibtex_reference(self, bibtex_string, ref_id, style_name="apa"):
        """
        Format a BibTeX entry.
        
        Args:
            bibtex_string: BibTeX entry as a string
            ref_id: Reference ID for citation
            style_name: Style name (apa, ieee, etc.)
            
        Returns:
            tuple: (citation_marker, reference_entry)
        """
        # Parse BibTeX string
        parser = bibtex.Parser()
        bib_data = parser.parse_string(bibtex_string)
        
        # Get the key of the first entry
        entry_key = list(bib_data.entries)[0]
        entry = bib_data.entries[entry_key]
        
        # Create citation marker
        citation_marker = f"[{ref_id}]"
        
        # Extract common fields
        authors = self.format_authors(entry, style_name)
        title = entry.fields.get('title', '')
        journal = entry.fields.get('journal', '')
        year = entry.fields.get('year', '')
        volume = entry.fields.get('volume', '')
        number = entry.fields.get('number', '')
        pages = entry.fields.get('pages', '')
        month = entry.fields.get('month', '')
        doi = entry.fields.get('doi', '')
        publisher = entry.fields.get('publisher', '')
        address = entry.fields.get('address', '')
        
        # Format according to style and entry type
        if entry.type == 'article':
            if style_name.lower() == "apa":
                # APA: Author. (Year). Title. Journal, Volume(Number), Pages.
                reference = f"{authors} ({year}). {title}. *{journal}*, {volume}"
                if number:
                    reference += f"({number})"
                reference += f", {pages}."
                
            elif style_name.lower() == "ieee":
                # IEEE: Author, "Title," Journal, vol. Volume, no. Number, pp. Pages, Month Year.
                reference = f"{authors}, \"{title},\" *{journal}*"
                if volume:
                    reference += f", vol. {volume}"
                if number:
                    reference += f", no. {number}"
                reference += f", pp. {pages}"
                if month:
                    reference += f", {month}"
                reference += f" {year}."
                
            elif style_name.lower() == "chicago":
                # Chicago: Author. "Title." Journal Volume, no. Number (Year): Pages.
                reference = f"{authors}. \"{title}.\" *{journal}* {volume}"
                if number:
                    reference += f", no. {number}"
                reference += f" ({year}): {pages}."
                
            elif style_name.lower() == "nature":
                # Nature: Author. Title. Journal Volume, Pages (Year).
                reference = f"{authors}. {title}. *{journal}* {volume}, {pages} ({year})."
                
            elif style_name.lower() == "harvard1":
                # Harvard: Author (Year) 'Title', Journal, Volume(Number), pp. Pages.
                reference = f"{authors} ({year}) '{title}', *{journal}*, {volume}"
                if number:
                    reference += f"({number})"
                reference += f", pp. {pages}."
                
            elif style_name.lower() == "vancouver":
                # Vancouver: Author. Title. Journal. Year;Volume(Number):Pages.
                reference = f"{authors}. {title}. {journal}. {year};{volume}"
                if number:
                    reference += f"({number})"
                reference += f":{pages}."
                
            else:
                # Plain: Author. Title. Journal, Volume(Number), Pages, Year.
                reference = f"{authors}. {title}. *{journal}*, {volume}"
                if number:
                    reference += f"({number})"
                reference += f", {pages}, {year}."
                
        elif entry.type in ['book', 'incollection', 'inbook']:
            if style_name.lower() == "apa":
                # APA: Author. (Year). Title. Publisher, Address.
                reference = f"{authors} ({year}). *{title}*. "
                if publisher:
                    reference += f"{publisher}"
                    if address:
                        reference += f", {address}"
                reference += "."
                
            elif style_name.lower() == "ieee":
                # IEEE: Author, Title. Address: Publisher, Year.
                reference = f"{authors}, *{title}*"
                if address or publisher:
                    reference += ". "
                    if address:
                        reference += f"{address}"
                        if publisher:
                            reference += ": "
                    if publisher:
                        reference += f"{publisher}"
                reference += f", {year}."
                
            elif style_name.lower() == "chicago":
                # Chicago: Author. Title. Address: Publisher, Year.
                reference = f"{authors}. *{title}*. "
                if address:
                    reference += f"{address}: "
                if publisher:
                    reference += f"{publisher}, "
                reference += f"{year}."
                
            else:
                # Default/Plain: Author. Title. Publisher, Address, Year.
                reference = f"{authors}. *{title}*. "
                if publisher:
                    reference += f"{publisher}"
                    if address:
                        reference += f", {address}"
                reference += f", {year}."
        
        else:  # Other entry types
            # Generic format for misc entry types
            reference = f"{authors} ({year}). {title}."
        
        # Add DOI with proper Markdown link formatting if available
        if doi and "doi:" not in reference.lower() and "doi.org" not in reference.lower():
            reference += f" [doi:{doi}](https://doi.org/{doi})"
        
        # Format as a numbered reference
        markdown_reference = f"[{ref_id}]: {reference}"
        
        return citation_marker, markdown_reference

    def format_authors(self, entry, style_name):
        """Format authors according to the style."""
        if 'author' not in entry.persons and 'editor' not in entry.persons:
            return ""
        
        # Use editors if no authors are available
        if 'author' in entry.persons:
            authors = entry.persons['author']
        else:
            authors = entry.persons['editor']
            
        # IEEE, Vancouver: Use last name first with initials
        if style_name.lower() in ["ieee", "vancouver"]:
            if len(authors) == 1:
                return self.format_last_first(authors[0], True)
            elif len(authors) <= 7:
                names = [self.format_last_first(author, True) for author in authors[:-1]]
                names.append(f"and {self.format_last_first(authors[-1], True)}")
                return ", ".join(names)
            else:
                # More than 7 authors: show first 6 + et al.
                names = [self.format_last_first(author, True) for author in authors[:6]]
                names.append("et al.")
                return ", ".join(names)
        
        # Harvard, APA: Normal order with full first names, et al. for >3
        elif style_name.lower() in ["harvard1", "apa"]:
            if len(authors) > 7:
                # For APA with more than 7 authors: First 6 + ... + last author
                names = [self.format_name(author) for author in authors[:6]]
                names.append("...")
                names.append(self.format_name(authors[-1]))
                return ", ".join(names)
            elif len(authors) > 3:
                return f"{self.format_name(authors[0])} et al."
            elif len(authors) == 1:
                return self.format_name(authors[0])
            else:
                names = [self.format_name(author) for author in authors[:-1]]
                names.append(f"and {self.format_name(authors[-1])}")
                return ", ".join(names)
        
        # Nature: All authors in normal order
        elif style_name.lower() == "nature":
            if len(authors) == 1:
                return self.format_name(authors[0])
            else:
                names = [self.format_name(author) for author in authors[:-1]]
                names.append(f"& {self.format_name(authors[-1])}")
                return ", ".join(names)
        
        # Chicago: All authors in normal order
        elif style_name.lower() == "chicago":
            if len(authors) == 1:
                return self.format_name(authors[0])
            else:
                names = [self.format_name(author) for author in authors[:-1]]
                names.append(f"and {self.format_name(authors[-1])}")
                return ", ".join(names)
        
        # Default: Normal order
        else:
            if len(authors) == 1:
                return self.format_name(authors[0])
            else:
                names = [self.format_name(author) for author in authors[:-1]]
                names.append(f"and {self.format_name(authors[-1])}")
                return ", ".join(names)

    def format_name(self, person):
        """Format a person's name in normal order: First Middle Last"""
        first = " ".join(person.first_names)
        last = " ".join(person.last_names)
        return f"{first} {last}"
        
    def format_last_first(self, person, initials=False):
        """Format a person's name as Last, First or Last, F."""
        if initials:
            first = " ".join([name[0] + "." for name in person.first_names])
        else:
            first = " ".join(person.first_names)
        
        last = " ".join(person.last_names)
        
        if first:
            return f"{last}, {first}"
        else:
            return last

    def process_web_reference(self, web_data, ref_id):
        """
        Convert a web reference to a formatted citation according to style.
        
        Args:
            web_data: Dictionary with web reference data (title, url, etc.)
            ref_id: Reference ID for citation
            
        Returns:
            tuple: (citation_marker, reference_entry)
        """
        citation_marker = f"[{ref_id}]"
        
        # Extract web data
        title = web_data.get("title", "Web Page")
        url = web_data.get("url", "")
        snippet = web_data.get("snippet", "")
        
        # Look for a publication date in the title or snippet
        # Some websites include dates like "Jan 2023" or "2022" in titles
        date = ""
        date_patterns = [
            r'\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2},?\s+\d{4}\b',  # Jan 1, 2023
            r'\b\d{1,2}\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{4}\b',    # 1 Jan 2023
            r'\b(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b',  # January 1, 2023
            r'\b\d{4}\b'  # Just a year like 2023
        ]
        
        combined_text = f"{title} {snippet}"
        for pattern in date_patterns:
            match = re.search(pattern, combined_text)
            if match:
                date = match.group(0)
                break
        
        # Format according to APA style for websites without forcing a date
        markdown_reference = f"[{ref_id}]: {title}. "
        
        # Add date only if we found one in the content
        if date:
            markdown_reference += f"({date}). "
        
        # Add URL
        markdown_reference += f"Retrieved from [{url}]({url})"
        
        return citation_marker, markdown_reference

    def process_file_reference(self, filepath, ref_id, description=None):
        """
        Convert a filepath to a markdown reference with clickable link.
        
        Args:
            filepath: Path to the file
            ref_id: Reference ID for citation
            description: Optional description for the file
            
        Returns:
            tuple: (citation_marker, reference_entry)
        """
        citation_marker = f"[{ref_id}]"
        
        # Create file basename for display
        filename = os.path.basename(filepath)
        
        # Create clickable URL in the specific format:
        # https://filecloud.vicebio.com/ui/core/index.html?filter=FILENAME#expl-tabl.%2FSHARED%2Fvicebio_shares%2FPATH
        
        # Escape spaces in filename with + for the first part
        encoded_filename = filename.replace(' ', '+')
        
        # Encode path for the second part (after #expl-tabl.)
        # Extract directory path without filename
        directory_path = os.path.dirname(filepath)
        # Ensure path ends with '/'
        if directory_path and not directory_path.endswith('/'):
            directory_path += '/'
        
        encoded_path = f"/SHARED/vicebio_shares/{directory_path}"
        encoded_path = encoded_path.replace(' ', '%20')
        
        # Construct the full URL
        file_url = f"https://filecloud.vicebio.com/ui/core/index.html?filter={encoded_filename}#expl-tabl.{encoded_path}"
        
        # Create the reference with description if provided and target="_blank" for new tab
        if description and description != "Unknown Document" and description != filename:
            # Use description as the main title, filename as the link text
            markdown_reference = f"[{ref_id}]: **{description}**: <a href='{file_url}' target='_blank'>{filename}</a>"
        else:
            # Use filename as both title and link text
            markdown_reference = f"[{ref_id}]: <a href='{file_url}' target='_blank'>{filename}</a>"
        
        return citation_marker, markdown_reference

    def process_file_reference_SP(self, filepath, ref_id, description=None):
        """
        Convert a filepath to a markdown reference with clickable link.
        
        Args:
            filepath: Path to the file
            ref_id: Reference ID for citation
            description: Optional description for the file
            
        Returns:
            tuple: (citation_marker, reference_entry)
        """
        citation_marker = f"[{ref_id}]"
        
        # Create file basename for display
        filename = os.path.basename(filepath)
        
        # Determine file type indicator for SharePoint
        file_ext = os.path.splitext(filename)[1].lower()
        file_type_indicator = 'x'  # Default to Excel
        if file_ext in ['.docx', '.doc']:
            file_type_indicator = 'w'
        elif file_ext in ['.pptx', '.ppt']:
            file_type_indicator = 'p'
        elif file_ext in ['.pdf']:
            file_type_indicator = 'b'
        elif file_ext in ['.txt', '.csv', '.json', '.md']:
            file_type_indicator = 't'

        # Extract directory path without filename
        directory_path = os.path.dirname(filepath)
        # Ensure path ends with '/'
        if directory_path and not directory_path.endswith('/'):
            directory_path += '/'
        
        # URL encode the paths and filenames
        encoded_directory = directory_path.replace(' ', '%20').replace('#', '%23').replace('&', '%26')
        encoded_filename = filename.replace(' ', '%20').replace('#', '%23').replace('&', '%26')

        # Clean the directory path to remove the first part before the first "/" if present
        path_parts = encoded_directory.split('/', 1)
        if len(path_parts) > 1:
            # Remove the first directory level
            cleaned_directory = path_parts[1]
        else:
            cleaned_directory = encoded_directory


        # 1. Create SharePoint web view URL - removing the first directory part
        web_url = f"https://ethernabvba.sharepoint.com/:{file_type_indicator}:/r/sites/DATA-T001/Shared%20Documents/{cleaned_directory}{encoded_filename}?d=w{uuid4().hex[:12]}&csf=1&web=1"

        # 2. Create SharePoint direct download URL - also with cleaned path
        download_url = f"https://ethernabvba.sharepoint.com/sites/DATA-T001/_layouts/15/download.aspx?SourceUrl=/sites/DATA-T001/Shared%20Documents/{cleaned_directory}{encoded_filename}"
        
        # Create the reference with both URL options
        if description:
            markdown_reference = f"[{ref_id}]: **{description}**: [{filename}]({web_url}) [(Download)]({download_url})"
        else:
            markdown_reference = f"[{ref_id}]: [{filename}]({web_url}) [(Download)]({download_url})"
        
        return citation_marker, markdown_reference

    def generate_references_section(self, references, style="apa"):
        """
        Generate a complete references section in Markdown from a list of reference objects.
        
        Args:
            references: List of reference objects (dictionaries with required fields)
            style: Citation style to use (e.g., 'apa', 'ieee', 'nature', etc.)
            
        Returns:
            tuple: (markdown_output, citation_map)
        """
        title = self.style_titles.get(style.lower(), "References")
        markdown_output = f"## {title}\n\n"
        citation_map = {}
        
        # Format each reference
        for ref in references:
            ref_id = ref["id"]
            ref_type = ref["type"]
            
            if ref_type == "bibtex":
                citation, reference = self.format_bibtex_reference(ref["content"], ref_id, style)
            elif ref_type == "file" or ref_type == "document":
                description = ref.get("description", None)
                citation, reference = self.process_file_reference(ref["path"], ref_id, description)
            elif ref_type == "web":
                citation, reference = self.process_web_reference(ref, ref_id)
            else:
                # Simple text reference
                citation_marker = f"[{ref_id}]"
                markdown_reference = f"[{ref_id}]: {ref.get('content', 'Reference')}"
                citation, reference = citation_marker, markdown_reference
                
            markdown_output += reference + "\n\n"
            citation_map[ref_id] = citation
        
        return markdown_output.strip(), citation_map

    def list_available_styles(self):
        """
        List all available citation styles
        
        Returns:
            list: Names of available styles
        """
        return ["apa", "ieee", "chicago", "harvard1", "nature", "vancouver", "plain"]

Parameters

Name	Type	Default	Kind
`bases`	-	-

Parameter Details

bases: Parameter of type

Return Value

Returns unspecified type

Class Interface

Methods

`init(self, default_style)`

Purpose: Initialize the reference manager. Args: default_style: Default citation style to use

Parameters:

default_style: Parameter

Returns: None

`extract_references(self, text)`

Purpose: Extract block references from text in various formats including: - Single blocks: [block 1] or [Block 1] - Multiple blocks: [Block 1, Block 2, Block 3] or [Block 1, 2, 3] Args: text: Text to extract references from Returns: list: List of block numbers referenced in the text

Parameters:

text: Parameter

Returns: See docstring for return details

`process_references(self, text, blocks_dict, style)`

Purpose: Process references in text, lookup block information, and generate reference section. An improved approach to handle complex reference patterns reliably including ranges. Args: text: Text containing references blocks_dict: Dictionary with block data indexed by block number style: Citation style to use (defaults to self.default_style) Returns: tuple: (updated_text, references_section)

Parameters:

text: Parameter
blocks_dict: Parameter
style: Parameter

Returns: See docstring for return details

`format_bibtex_reference(self, bibtex_string, ref_id, style_name)`

Purpose: Format a BibTeX entry. Args: bibtex_string: BibTeX entry as a string ref_id: Reference ID for citation style_name: Style name (apa, ieee, etc.) Returns: tuple: (citation_marker, reference_entry)

Parameters:

bibtex_string: Parameter
ref_id: Parameter
style_name: Parameter

Returns: See docstring for return details

`format_authors(self, entry, style_name)`

Purpose: Format authors according to the style.

Parameters:

entry: Parameter
style_name: Parameter

Returns: None

`format_name(self, person)`

Purpose: Format a person's name in normal order: First Middle Last

Parameters:

person: Parameter

Returns: None

`format_last_first(self, person, initials)`

Purpose: Format a person's name as Last, First or Last, F.

Parameters:

person: Parameter
initials: Parameter

Returns: None

`process_web_reference(self, web_data, ref_id)`

Purpose: Convert a web reference to a formatted citation according to style. Args: web_data: Dictionary with web reference data (title, url, etc.) ref_id: Reference ID for citation Returns: tuple: (citation_marker, reference_entry)

Parameters:

web_data: Parameter
ref_id: Parameter

Returns: See docstring for return details

`process_file_reference(self, filepath, ref_id, description)`

Purpose: Convert a filepath to a markdown reference with clickable link. Args: filepath: Path to the file ref_id: Reference ID for citation description: Optional description for the file Returns: tuple: (citation_marker, reference_entry)

Parameters:

filepath: Parameter
ref_id: Parameter
description: Parameter

Returns: See docstring for return details

`process_file_reference_SP(self, filepath, ref_id, description)`

Parameters:

filepath: Parameter
ref_id: Parameter
description: Parameter

Returns: See docstring for return details

`generate_references_section(self, references, style)`

Purpose: Generate a complete references section in Markdown from a list of reference objects. Args: references: List of reference objects (dictionaries with required fields) style: Citation style to use (e.g., 'apa', 'ieee', 'nature', etc.) Returns: tuple: (markdown_output, citation_map)

Parameters:

references: Parameter
style: Parameter

Returns: See docstring for return details

`list_available_styles(self)`

Purpose: List all available citation styles Returns: list: Names of available styles

Returns: See docstring for return details

Required Imports

from typing import List
from typing import Any
from typing import Dict
import os
import panel as pn

Usage Example

# Example usage:
# result = ReferenceManager(bases)

Similar Components

AI-powered semantic similarity - components with related functionality:

class ReferenceManager_v3 98.5% similar

Manages extraction and formatting of references for LLM chat responses. Handles both file references and BibTeX citations, formatting them according to various academic citation styles.
From: /tf/active/vicechatdev/OneCo_hybrid_RAG_old.py
class ReferenceManager_v2 98.5% similar

Manages extraction and formatting of references for LLM chat responses. Handles both file references and BibTeX citations, formatting them according to various academic citation styles.
From: /tf/active/vicechatdev/OneCo_hybrid_RAG copy.py
class ReferenceManager_v5 98.4% similar

Manages extraction and formatting of references for LLM chat responses. Handles both file references and BibTeX citations, formatting them according to various academic citation styles.
From: /tf/active/vicechatdev/datacapture_backup_16072025/OneCo_hybrid_RAG.py
class ReferenceManager 74.4% similar

Manages document references for inline citation and bibliography generation in a RAG (Retrieval-Augmented Generation) system.
From: /tf/active/vicechatdev/fixed_project_victoria_generator.py
class ReferenceManager_v1 73.2% similar

Manages document references for inline citation and bibliography generation, tracking documents and generating formatted citations and bibliographies.
From: /tf/active/vicechatdev/improved_project_victoria_generator.py

← Back to Browse

Assistant

Hi! I can help improve this code. Tell me what you'd like to enhance (e.g., "add error handling", "optimize performance", "improve readability", "add type hints").

Code Comparison

Original Code

                            class ReferenceManager:
    """
    Manages extraction and formatting of references for LLM chat responses.
    Handles both file references and BibTeX citations, formatting them according
    to various academic citation styles.
    """
    
    def __init__(self, default_style="apa"):
        """
        Initialize the reference manager.
        
        Args:
            default_style: Default citation style to use
        """
        self.default_style = default_style
        self.style_titles = {
            "apa": "References",
            "mla": "Works Cited",
            "chicago": "Bibliography",
            "ieee": "References",
            "harvard1": "References",
            "vancouver": "References",
            "nature": "References"
        }

    
    
    def extract_references(self, text):
        """
        Extract block references from text in various formats including:
        - Single blocks: [block 1] or [Block 1]
        - Multiple blocks: [Block 1, Block 2, Block 3] or [Block 1, 2, 3]
        
        Args:
            text: Text to extract references from
            
        Returns:
            list: List of block numbers referenced in the text
        """
        # Regex patterns to find different reference formats
        # Single block pattern: [block 1] or [Block 1]
        single_pattern = r'\[(?:[Bb]lock)\s+(\d+)\]'
        # Multiple blocks pattern: [Block 1, Block 2, Block 3]
        multi_block_pattern = r'\[(?:[Bb]lock\s+\d+(?:,\s*[Bb]lock\s+\d+)+)\]'
        # Multiple blocks pattern with abbreviated format: [Block 1, 2, 3]
        abbreviated_pattern = r'\[(?:[Bb]lock\s+\d+(?:,\s*\d+)+)\]'
        
        # Find all patterns and extract block numbers
        unique_refs = []
        
        # Extract single block references
        single_refs = re.findall(single_pattern, text)
        for ref in single_refs:
            block_num = int(ref)
            if block_num not in unique_refs:
                unique_refs.append(block_num)
        
        # Extract multiple block references with full "Block X" format
        multi_blocks = re.findall(multi_block_pattern, text)
        for block_group in multi_blocks:
            # Remove outer brackets
            clean_group = block_group[1:-1]
            # Find all block numbers in this group
            block_nums = re.findall(r'[Bb]lock\s+(\d+)', clean_group)
            for num in block_nums:
                block_num = int(num)
                if block_num not in unique_refs:
                    unique_refs.append(block_num)
        
        # Extract multiple block references with abbreviated format
        abbreviated_blocks = re.findall(abbreviated_pattern, text)
        for block_group in abbreviated_blocks:
            # Remove outer brackets
            clean_group = block_group[1:-1]
            # Find the first block number (with "Block" prefix)
            first_block_match = re.search(r'[Bb]lock\s+(\d+)', clean_group)
            if first_block_match:
                first_block = int(first_block_match.group(1))
                if first_block not in unique_refs:
                    unique_refs.append(first_block)
                
                # Find all additional numbers in this group
                additional_nums = re.findall(r',\s*(\d+)', clean_group)
                for num in additional_nums:
                    block_num = int(num)
                    if block_num not in unique_refs:
                        unique_refs.append(block_num)
        
        return unique_refs
    
    def process_references(self, text, blocks_dict, style=None):
        """
        Process references in text, lookup block information, and generate reference section.
        An improved approach to handle complex reference patterns reliably including ranges.
        
        Args:
            text: Text containing references
            blocks_dict: Dictionary with block data indexed by block number
            style: Citation style to use (defaults to self.default_style)
            
        Returns:
            tuple: (updated_text, references_section)
        """
        # Use default style if none specified
        if style is None:
            style = self.default_style
        
        # STEP 1: Find all block references in the text with enhanced pattern matching
        # This pattern also captures ranges with hyphens (e.g., [Blocks 2-11])
        all_refs_pattern = r'\[((?:[Bb]locks?\s+\d+(?:\s*[,-]\s*(?:[Bb]locks?\s*)?\d+)*)|(?:[Bb]locks?\s+\d+))\]'
        ref_matches = re.findall(all_refs_pattern, text)
        print(f"DEBUG: Found reference matches: {ref_matches}")
        
        # Create a mapping of original reference text to its position
        original_refs = {}
        for match in re.finditer(all_refs_pattern, text):
            original_refs[match.group(0)] = match.span()
        
        print(f"DEBUG: Original refs dict: {list(original_refs.keys())}")
        print(f"DEBUG: Available blocks in dict: {list(blocks_dict.keys())}")
        
        # STEP 2: Extract block numbers from each reference
        reference_catalog = {}
        for ref_text in ref_matches:
            # Handle ranges with hyphens (e.g., "Blocks 2-11")
            expanded_numbers = []
            
            # First check for ranges with hyphens
            range_matches = re.findall(r'(\d+)\s*-\s*(\d+)', ref_text)
            for start, end in range_matches:
                # Convert to integers and expand the range
                start_num, end_num = int(start), int(end)
                expanded_numbers.extend(range(start_num, end_num + 1))
            
            # Then add any individual numbers not in ranges
            # Replace ranges first to avoid counting them twice
            cleaned_text = re.sub(r'\d+\s*-\s*\d+', '', ref_text)
            individual_numbers = [int(num) for num in re.findall(r'\d+', cleaned_text)]
            expanded_numbers.extend(individual_numbers)
            
            # Remove duplicates and store in catalog
            block_numbers = sorted(list(set(expanded_numbers)))
            reference_catalog[f"[{ref_text}]"] = block_numbers
        
        # Collect all unique block numbers across all references
        all_block_numbers = []
        for numbers in reference_catalog.values():
            for num in numbers:
                if num not in all_block_numbers:
                    all_block_numbers.append(num)
        
        # STEP 3: Create references for all found block numbers
        references = []
        citation_map = {}
        
        for i, block_num in enumerate(all_block_numbers, 1):
            if block_num not in blocks_dict:
                print(f"Warning: Block {block_num} referenced but not found in blocks_dict")
                continue
                
            block_data = blocks_dict[block_num]
            block_type = block_data.get("type", "unknown")
            ref_id = str(i)
            
            # Create reference object based on block type
            if block_type == "document":
                # Document/file reference
                ref_obj = {
                    "id": ref_id,
                    "type": "file",
                    "path": block_data.get("path", ""),
                    "description": block_data.get("description", None)
                }
            elif block_type == "literature":
                # BibTeX reference
                ref_obj = {
                    "id": ref_id,
                    "type": "bibtex",
                    "content": block_data.get("bibtex", "")
                }
            elif block_type == "web":
                # Web reference
                ref_obj = {
                    "id": ref_id,
                    "type": "web",
                    "url": block_data.get("url", ""),
                    "title": block_data.get("title", "Web Page"),
                    "snippet": block_data.get("snippet", ""),
                    "date": block_data.get("date", "")
                }
            else:
                # Generic reference (fallback)
                ref_obj = {
                    "id": ref_id,
                    "type": "generic",
                    "content": block_data.get("content", "")
                }
                
            references.append(ref_obj)
            citation_map[block_num] = ref_id
        
        # Generate reference section
        print("citation map", citation_map)
        references_section, _ = self.generate_references_section(references, style)
        
        # STEP 4: Replace all original references with their numerical equivalents
        updated_text = text
        
        # Debug print to see what we're working with
        print(f"Original refs to process: {len(original_refs)}")
        
        # Sort references by length (longest first) to avoid partial replacements
        sorted_refs = sorted(original_refs.keys(), key=len, reverse=True)
        
        for original_ref in sorted_refs:
            # Get block numbers for this reference
            block_numbers = reference_catalog.get(original_ref, [])
            
            # Skip if no block numbers found (shouldn't happen with our regex)
            if not block_numbers:
                continue
                
            # Create a list of citation IDs for the block numbers
            ref_ids = []
            for block_num in block_numbers:
                if block_num in citation_map:
                    ref_ids.append(citation_map[block_num])
            
            # Only proceed if we have valid reference IDs
            if ref_ids:
                # Sort the reference IDs numerically before joining them
                # This ensures they appear in increasing order: [1, 2, 3] instead of [3, 1, 2]
                sorted_ref_ids = sorted(ref_ids, key=int)
                
                # Create the new reference string with comma-delimited ref_ids
                # Always using comma format even if original was a range
                new_ref = f"[{', '.join(sorted_ref_ids)}]"
                
                # Replace the entire original reference with the new formatted one
                updated_text = updated_text.replace(original_ref, new_ref)
                
                # Debug prints to see what's happening
                print(f"Replacing: {original_ref} -> {new_ref}")
        
        return updated_text, references_section
    
    def format_bibtex_reference(self, bibtex_string, ref_id, style_name="apa"):
        """
        Format a BibTeX entry.
        
        Args:
            bibtex_string: BibTeX entry as a string
            ref_id: Reference ID for citation
            style_name: Style name (apa, ieee, etc.)
            
        Returns:
            tuple: (citation_marker, reference_entry)
        """
        # Parse BibTeX string
        parser = bibtex.Parser()
        bib_data = parser.parse_string(bibtex_string)
        
        # Get the key of the first entry
        entry_key = list(bib_data.entries)[0]
        entry = bib_data.entries[entry_key]
        
        # Create citation marker
        citation_marker = f"[{ref_id}]"
        
        # Extract common fields
        authors = self.format_authors(entry, style_name)
        title = entry.fields.get('title', '')
        journal = entry.fields.get('journal', '')
        year = entry.fields.get('year', '')
        volume = entry.fields.get('volume', '')
        number = entry.fields.get('number', '')
        pages = entry.fields.get('pages', '')
        month = entry.fields.get('month', '')
        doi = entry.fields.get('doi', '')
        publisher = entry.fields.get('publisher', '')
        address = entry.fields.get('address', '')
        
        # Format according to style and entry type
        if entry.type == 'article':
            if style_name.lower() == "apa":
                # APA: Author. (Year). Title. Journal, Volume(Number), Pages.
                reference = f"{authors} ({year}). {title}. *{journal}*, {volume}"
                if number:
                    reference += f"({number})"
                reference += f", {pages}."
                
            elif style_name.lower() == "ieee":
                # IEEE: Author, "Title," Journal, vol. Volume, no. Number, pp. Pages, Month Year.
                reference = f"{authors}, \"{title},\" *{journal}*"
                if volume:
                    reference += f", vol. {volume}"
                if number:
                    reference += f", no. {number}"
                reference += f", pp. {pages}"
                if month:
                    reference += f", {month}"
                reference += f" {year}."
                
            elif style_name.lower() == "chicago":
                # Chicago: Author. "Title." Journal Volume, no. Number (Year): Pages.
                reference = f"{authors}. \"{title}.\" *{journal}* {volume}"
                if number:
                    reference += f", no. {number}"
                reference += f" ({year}): {pages}."
                
            elif style_name.lower() == "nature":
                # Nature: Author. Title. Journal Volume, Pages (Year).
                reference = f"{authors}. {title}. *{journal}* {volume}, {pages} ({year})."
                
            elif style_name.lower() == "harvard1":
                # Harvard: Author (Year) 'Title', Journal, Volume(Number), pp. Pages.
                reference = f"{authors} ({year}) '{title}', *{journal}*, {volume}"
                if number:
                    reference += f"({number})"
                reference += f", pp. {pages}."
                
            elif style_name.lower() == "vancouver":
                # Vancouver: Author. Title. Journal. Year;Volume(Number):Pages.
                reference = f"{authors}. {title}. {journal}. {year};{volume}"
                if number:
                    reference += f"({number})"
                reference += f":{pages}."
                
            else:
                # Plain: Author. Title. Journal, Volume(Number), Pages, Year.
                reference = f"{authors}. {title}. *{journal}*, {volume}"
                if number:
                    reference += f"({number})"
                reference += f", {pages}, {year}."
                
        elif entry.type in ['book', 'incollection', 'inbook']:
            if style_name.lower() == "apa":
                # APA: Author. (Year). Title. Publisher, Address.
                reference = f"{authors} ({year}). *{title}*. "
                if publisher:
                    reference += f"{publisher}"
                    if address:
                        reference += f", {address}"
                reference += "."
                
            elif style_name.lower() == "ieee":
                # IEEE: Author, Title. Address: Publisher, Year.
                reference = f"{authors}, *{title}*"
                if address or publisher:
                    reference += ". "
                    if address:
                        reference += f"{address}"
                        if publisher:
                            reference += ": "
                    if publisher:
                        reference += f"{publisher}"
                reference += f", {year}."
                
            elif style_name.lower() == "chicago":
                # Chicago: Author. Title. Address: Publisher, Year.
                reference = f"{authors}. *{title}*. "
                if address:
                    reference += f"{address}: "
                if publisher:
                    reference += f"{publisher}, "
                reference += f"{year}."
                
            else:
                # Default/Plain: Author. Title. Publisher, Address, Year.
                reference = f"{authors}. *{title}*. "
                if publisher:
                    reference += f"{publisher}"
                    if address:
                        reference += f", {address}"
                reference += f", {year}."
        
        else:  # Other entry types
            # Generic format for misc entry types
            reference = f"{authors} ({year}). {title}."
        
        # Add DOI with proper Markdown link formatting if available
        if doi and "doi:" not in reference.lower() and "doi.org" not in reference.lower():
            reference += f" [doi:{doi}](https://doi.org/{doi})"
        
        # Format as a numbered reference
        markdown_reference = f"[{ref_id}]: {reference}"
        
        return citation_marker, markdown_reference

    def format_authors(self, entry, style_name):
        """Format authors according to the style."""
        if 'author' not in entry.persons and 'editor' not in entry.persons:
            return ""
        
        # Use editors if no authors are available
        if 'author' in entry.persons:
            authors = entry.persons['author']
        else:
            authors = entry.persons['editor']
            
        # IEEE, Vancouver: Use last name first with initials
        if style_name.lower() in ["ieee", "vancouver"]:
            if len(authors) == 1:
                return self.format_last_first(authors[0], True)
            elif len(authors) <= 7:
                names = [self.format_last_first(author, True) for author in authors[:-1]]
                names.append(f"and {self.format_last_first(authors[-1], True)}")
                return ", ".join(names)
            else:
                # More than 7 authors: show first 6 + et al.
                names = [self.format_last_first(author, True) for author in authors[:6]]
                names.append("et al.")
                return ", ".join(names)
        
        # Harvard, APA: Normal order with full first names, et al. for >3
        elif style_name.lower() in ["harvard1", "apa"]:
            if len(authors) > 7:
                # For APA with more than 7 authors: First 6 + ... + last author
                names = [self.format_name(author) for author in authors[:6]]
                names.append("...")
                names.append(self.format_name(authors[-1]))
                return ", ".join(names)
            elif len(authors) > 3:
                return f"{self.format_name(authors[0])} et al."
            elif len(authors) == 1:
                return self.format_name(authors[0])
            else:
                names = [self.format_name(author) for author in authors[:-1]]
                names.append(f"and {self.format_name(authors[-1])}")
                return ", ".join(names)
        
        # Nature: All authors in normal order
        elif style_name.lower() == "nature":
            if len(authors) == 1:
                return self.format_name(authors[0])
            else:
                names = [self.format_name(author) for author in authors[:-1]]
                names.append(f"& {self.format_name(authors[-1])}")
                return ", ".join(names)
        
        # Chicago: All authors in normal order
        elif style_name.lower() == "chicago":
            if len(authors) == 1:
                return self.format_name(authors[0])
            else:
                names = [self.format_name(author) for author in authors[:-1]]
                names.append(f"and {self.format_name(authors[-1])}")
                return ", ".join(names)
        
        # Default: Normal order
        else:
            if len(authors) == 1:
                return self.format_name(authors[0])
            else:
                names = [self.format_name(author) for author in authors[:-1]]
                names.append(f"and {self.format_name(authors[-1])}")
                return ", ".join(names)

    def format_name(self, person):
        """Format a person's name in normal order: First Middle Last"""
        first = " ".join(person.first_names)
        last = " ".join(person.last_names)
        return f"{first} {last}"
        
    def format_last_first(self, person, initials=False):
        """Format a person's name as Last, First or Last, F."""
        if initials:
            first = " ".join([name[0] + "." for name in person.first_names])
        else:
            first = " ".join(person.first_names)
        
        last = " ".join(person.last_names)
        
        if first:
            return f"{last}, {first}"
        else:
            return last

    def process_web_reference(self, web_data, ref_id):
        """
        Convert a web reference to a formatted citation according to style.
        
        Args:
            web_data: Dictionary with web reference data (title, url, etc.)
            ref_id: Reference ID for citation
            
        Returns:
            tuple: (citation_marker, reference_entry)
        """
        citation_marker = f"[{ref_id}]"
        
        # Extract web data
        title = web_data.get("title", "Web Page")
        url = web_data.get("url", "")
        snippet = web_data.get("snippet", "")
        
        # Look for a publication date in the title or snippet
        # Some websites include dates like "Jan 2023" or "2022" in titles
        date = ""
        date_patterns = [
            r'\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2},?\s+\d{4}\b',  # Jan 1, 2023
            r'\b\d{1,2}\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{4}\b',    # 1 Jan 2023
            r'\b(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b',  # January 1, 2023
            r'\b\d{4}\b'  # Just a year like 2023
        ]
        
        combined_text = f"{title} {snippet}"
        for pattern in date_patterns:
            match = re.search(pattern, combined_text)
            if match:
                date = match.group(0)
                break
        
        # Format according to APA style for websites without forcing a date
        markdown_reference = f"[{ref_id}]: {title}. "
        
        # Add date only if we found one in the content
        if date:
            markdown_reference += f"({date}). "
        
        # Add URL
        markdown_reference += f"Retrieved from [{url}]({url})"
        
        return citation_marker, markdown_reference

    def process_file_reference(self, filepath, ref_id, description=None):
        """
        Convert a filepath to a markdown reference with clickable link.
        
        Args:
            filepath: Path to the file
            ref_id: Reference ID for citation
            description: Optional description for the file
            
        Returns:
            tuple: (citation_marker, reference_entry)
        """
        citation_marker = f"[{ref_id}]"
        
        # Create file basename for display
        filename = os.path.basename(filepath)
        
        # Create clickable URL in the specific format:
        # https://filecloud.vicebio.com/ui/core/index.html?filter=FILENAME#expl-tabl.%2FSHARED%2Fvicebio_shares%2FPATH
        
        # Escape spaces in filename with + for the first part
        encoded_filename = filename.replace(' ', '+')
        
        # Encode path for the second part (after #expl-tabl.)
        # Extract directory path without filename
        directory_path = os.path.dirname(filepath)
        # Ensure path ends with '/'
        if directory_path and not directory_path.endswith('/'):
            directory_path += '/'
        
        encoded_path = f"/SHARED/vicebio_shares/{directory_path}"
        encoded_path = encoded_path.replace(' ', '%20')
        
        # Construct the full URL
        file_url = f"https://filecloud.vicebio.com/ui/core/index.html?filter={encoded_filename}#expl-tabl.{encoded_path}"
        
        # Create the reference with description if provided and target="_blank" for new tab
        if description and description != "Unknown Document" and description != filename:
            # Use description as the main title, filename as the link text
            markdown_reference = f"[{ref_id}]: **{description}**: <a href='{file_url}' target='_blank'>{filename}</a>"
        else:
            # Use filename as both title and link text
            markdown_reference = f"[{ref_id}]: <a href='{file_url}' target='_blank'>{filename}</a>"
        
        return citation_marker, markdown_reference

    def process_file_reference_SP(self, filepath, ref_id, description=None):
        """
        Convert a filepath to a markdown reference with clickable link.
        
        Args:
            filepath: Path to the file
            ref_id: Reference ID for citation
            description: Optional description for the file
            
        Returns:
            tuple: (citation_marker, reference_entry)
        """
        citation_marker = f"[{ref_id}]"
        
        # Create file basename for display
        filename = os.path.basename(filepath)
        
        # Determine file type indicator for SharePoint
        file_ext = os.path.splitext(filename)[1].lower()
        file_type_indicator = 'x'  # Default to Excel
        if file_ext in ['.docx', '.doc']:
            file_type_indicator = 'w'
        elif file_ext in ['.pptx', '.ppt']:
            file_type_indicator = 'p'
        elif file_ext in ['.pdf']:
            file_type_indicator = 'b'
        elif file_ext in ['.txt', '.csv', '.json', '.md']:
            file_type_indicator = 't'

        # Extract directory path without filename
        directory_path = os.path.dirname(filepath)
        # Ensure path ends with '/'
        if directory_path and not directory_path.endswith('/'):
            directory_path += '/'
        
        # URL encode the paths and filenames
        encoded_directory = directory_path.replace(' ', '%20').replace('#', '%23').replace('&', '%26')
        encoded_filename = filename.replace(' ', '%20').replace('#', '%23').replace('&', '%26')

        # Clean the directory path to remove the first part before the first "/" if present
        path_parts = encoded_directory.split('/', 1)
        if len(path_parts) > 1:
            # Remove the first directory level
            cleaned_directory = path_parts[1]
        else:
            cleaned_directory = encoded_directory


        # 1. Create SharePoint web view URL - removing the first directory part
        web_url = f"https://ethernabvba.sharepoint.com/:{file_type_indicator}:/r/sites/DATA-T001/Shared%20Documents/{cleaned_directory}{encoded_filename}?d=w{uuid4().hex[:12]}&csf=1&web=1"

        # 2. Create SharePoint direct download URL - also with cleaned path
        download_url = f"https://ethernabvba.sharepoint.com/sites/DATA-T001/_layouts/15/download.aspx?SourceUrl=/sites/DATA-T001/Shared%20Documents/{cleaned_directory}{encoded_filename}"
        
        # Create the reference with both URL options
        if description:
            markdown_reference = f"[{ref_id}]: **{description}**: [{filename}]({web_url}) [(Download)]({download_url})"
        else:
            markdown_reference = f"[{ref_id}]: [{filename}]({web_url}) [(Download)]({download_url})"
        
        return citation_marker, markdown_reference

    def generate_references_section(self, references, style="apa"):
        """
        Generate a complete references section in Markdown from a list of reference objects.
        
        Args:
            references: List of reference objects (dictionaries with required fields)
            style: Citation style to use (e.g., 'apa', 'ieee', 'nature', etc.)
            
        Returns:
            tuple: (markdown_output, citation_map)
        """
        title = self.style_titles.get(style.lower(), "References")
        markdown_output = f"## {title}\n\n"
        citation_map = {}
        
        # Format each reference
        for ref in references:
            ref_id = ref["id"]
            ref_type = ref["type"]
            
            if ref_type == "bibtex":
                citation, reference = self.format_bibtex_reference(ref["content"], ref_id, style)
            elif ref_type == "file" or ref_type == "document":
                description = ref.get("description", None)
                citation, reference = self.process_file_reference(ref["path"], ref_id, description)
            elif ref_type == "web":
                citation, reference = self.process_web_reference(ref, ref_id)
            else:
                # Simple text reference
                citation_marker = f"[{ref_id}]"
                markdown_reference = f"[{ref_id}]: {ref.get('content', 'Reference')}"
                citation, reference = citation_marker, markdown_reference
                
            markdown_output += reference + "\n\n"
            citation_map[ref_id] = citation
        
        return markdown_output.strip(), citation_map

    def list_available_styles(self):
        """
        List all available citation styles
        
        Returns:
            list: Names of available styles
        """
        return ["apa", "ieee", "chicago", "harvard1", "nature", "vancouver", "plain"]
                        

Improved Code

🔍 Code Extractor

class ReferenceManager_v4

Purpose

Source Code

Parameters

Parameter Details

Return Value

Class Interface

Methods

`init(self, default_style)`

`extract_references(self, text)`

`process_references(self, text, blocks_dict, style)`

`format_bibtex_reference(self, bibtex_string, ref_id, style_name)`

`format_authors(self, entry, style_name)`

`format_name(self, person)`

`format_last_first(self, person, initials)`

`process_web_reference(self, web_data, ref_id)`

`process_file_reference(self, filepath, ref_id, description)`

`process_file_reference_SP(self, filepath, ref_id, description)`

`generate_references_section(self, references, style)`

`list_available_styles(self)`

Required Imports

Usage Example

Tags

Similar Components

class ReferenceManager_v3 98.5% similar

class ReferenceManager_v2 98.5% similar

class ReferenceManager_v5 98.4% similar

class ReferenceManager 74.4% similar

class ReferenceManager_v1 73.2% similar

class ReferenceManager_v4

Purpose

Source Code

Parameters

Parameter Details

Return Value

Class Interface

Methods

__init__(self, default_style)

extract_references(self, text)

process_references(self, text, blocks_dict, style)

format_bibtex_reference(self, bibtex_string, ref_id, style_name)

format_authors(self, entry, style_name)

format_name(self, person)

format_last_first(self, person, initials)

process_web_reference(self, web_data, ref_id)

process_file_reference(self, filepath, ref_id, description)

process_file_reference_SP(self, filepath, ref_id, description)

generate_references_section(self, references, style)

list_available_styles(self)

Required Imports

Usage Example

Tags

Similar Components

class ReferenceManager_v3 98.5% similar

class ReferenceManager_v2 98.5% similar

class ReferenceManager_v5 98.4% similar

class ReferenceManager 74.4% similar

class ReferenceManager_v1 73.2% similar

✨ Improve Code: ReferenceManager_v4

Code Comparison

`init(self, default_style)`

`extract_references(self, text)`

`process_references(self, text, blocks_dict, style)`

`format_bibtex_reference(self, bibtex_string, ref_id, style_name)`

`format_authors(self, entry, style_name)`

`format_name(self, person)`

`format_last_first(self, person, initials)`

`process_web_reference(self, web_data, ref_id)`

`process_file_reference(self, filepath, ref_id, description)`

`process_file_reference_SP(self, filepath, ref_id, description)`

`generate_references_section(self, references, style)`

`list_available_styles(self)`