ExtensiveSearchManager_v1 - Code Extractor

class ExtensiveSearchManager_v1

Maturity: 27

Manages extensive search functionality including full document retrieval, summarization, and enhanced context gathering.

File:
/tf/active/vicechatdev/vice_ai/hybrid_rag_engine.py

Lines:
4887 - 6205

Complexity:
moderate

Purpose

Manages extensive search functionality including full document retrieval, summarization, and enhanced context gathering.

Source Code

class ExtensiveSearchManager:
    """
    Manages extensive search functionality including full document retrieval,
    summarization, and enhanced context gathering.
    """
    
    def __init__(self, session, chroma_client, api_key="", rag_instance=None):
        self.session = session
        self.chroma_client = chroma_client
        self.api_key = api_key
        self.rag_instance = rag_instance  # Reference to main RAG instance for usage tracking
        self.summarizer = ChatOpenAI(
            model="gpt-4o-mini",
            temperature=0,
            max_tokens=2000,
            api_key=api_key
        )
        
        # Set up tokenizer for counting
        self.tokenizer = tiktoken.get_encoding("cl100k_base")
        
        # Initialize caches
        self.document_cache = {}
        self.summary_cache = {}
    
    def count_tokens(self, text):
        """Count tokens in text"""
        return len(self.tokenizer.encode(text))
    
    def get_full_document(self, chunk_metadata, collection_name):
        """
        Retrieve the full document that a chunk belongs to by using Neo4j to find all chunks
        from the same document and reconstruct the complete document.
        
        Args:
            chunk_metadata: Metadata from the chunk containing bibtex path or document info
            collection_name: Name of the ChromaDB collection  
            
        Returns:
            str: Full document text ordered by chunk sequence or None if not found
        """
        # Extract bibtex path or try to find document identifier
        bibtex_path = chunk_metadata.get('bibtex', '')
        if not bibtex_path:
            return None
            
        # Create cache key
        cache_key = f"{collection_name}_{bibtex_path}"
        if cache_key in self.document_cache:
            return self.document_cache[cache_key]
        
        try:
            # The bibtex path in metadata should correspond to the document path
            # First, try to find the document directly by path
            query = """
            MATCH (d:Document)
            WHERE d.Path = $bibtex_path OR d.Name CONTAINS $doc_name OR toString(d.UID) = $bibtex_path
            MATCH (d)-[:CHUNK]->(c)
            WHERE c:Text_chunk OR c:Table_chunk
            RETURN c.Text AS content, c.UID AS chunk_uid, c.Name AS chunk_name, d.UID AS doc_uid, d.Name AS doc_name
            ORDER BY c.Name
            """
            
            # Extract potential document name from bibtex path
            doc_name = bibtex_path.split('/')[-1] if '/' in bibtex_path else bibtex_path
            
            results = self.session.run(query, {
                "bibtex_path": bibtex_path, 
                "doc_name": doc_name
            })
            
            chunks = []
            doc_info = None
            
            for record in results:
                if record["content"]:
                    chunks.append(record["content"])
                    if not doc_info:
                        doc_info = {
                            "doc_uid": record["doc_uid"],
                            "doc_name": record["doc_name"]
                        }
            
            # If direct path matching didn't work, try fuzzy matching on document content paths
            if not chunks:
                # Extract filename without extension for broader matching
                if '.' in doc_name:
                    base_name = doc_name.rsplit('.', 1)[0]
                else:
                    base_name = doc_name
                    
                fuzzy_query = """
                MATCH (d:Document)-[:CHUNK]->(c)
                WHERE (c:Text_chunk OR c:Table_chunk) 
                AND (d.Name CONTAINS $base_name OR d.Path CONTAINS $base_name 
                     OR any(path_part IN split(d.Path, '/') WHERE path_part CONTAINS $base_name))
                RETURN c.Text AS content, c.UID AS chunk_uid, c.Name AS chunk_name, 
                       d.UID AS doc_uid, d.Name AS doc_name, d.Path AS doc_path
                ORDER BY d.UID, c.Name
                LIMIT 100
                """
                
                results = self.session.run(fuzzy_query, {"base_name": base_name})
                
                # Group by document to find the best match
                doc_candidates = {}
                for record in results:
                    doc_uid = record["doc_uid"]
                    if doc_uid not in doc_candidates:
                        doc_candidates[doc_uid] = {
                            "chunks": [],
                            "doc_name": record["doc_name"],
                            "doc_path": record["doc_path"]
                        }
                    if record["content"]:
                        doc_candidates[doc_uid]["chunks"].append(record["content"])
                
                # Pick the document with the most chunks (likely the most complete)
                if doc_candidates:
                    best_doc = max(doc_candidates.items(), key=lambda x: len(x[1]["chunks"]))
                    chunks = best_doc[1]["chunks"]
                    doc_info = {
                        "doc_uid": best_doc[0],
                        "doc_name": best_doc[1]["doc_name"],
                        "doc_path": best_doc[1]["doc_path"]
                    }
                    print(f"         📄 Fuzzy matched document: {doc_info['doc_name']} with {len(chunks)} chunks")
            
            if chunks:
                # Concatenate all chunks to reconstruct the full document
                full_document = "\n\n".join(chunks)
                self.document_cache[cache_key] = full_document
                print(f"         ✅ Full document retrieved: {len(chunks)} chunks, {len(full_document)} chars")
                if doc_info:
                    print(f"         📄 Document: {doc_info['doc_name']} (UID: {doc_info['doc_uid']})")
                return full_document
            else:
                print(f"         ⚠️  No chunks found for document path: {bibtex_path}")
                
        except Exception as e:
            print(f"         ❌ Error retrieving full document from Neo4j: {e}")
        
        return None
    
    def get_full_document_neo4j(self, chunk_uid):
        """
        Retrieve the full document from Neo4j that a chunk belongs to.
        Updated to match the actual Neo4j schema from offline_docstore_multi_vice.py
        
        Args:
            chunk_uid: UID of the text/table chunk
            
        Returns:
            str: Full document text ordered by chunk sequence or None if not found
        """
        cache_key = f"neo4j_{chunk_uid}"
        if cache_key in self.document_cache:
            return self.document_cache[cache_key]
        
        try:
            # Query based on the actual schema: Document-[:CHUNK]->Text_chunk/Table_chunk
            query = """
            MATCH (chunk {UID: $chunk_uid})<-[:CHUNK]-(doc:Document)
            MATCH (doc)-[:CHUNK]->(all_chunks)
            WHERE all_chunks:Text_chunk OR all_chunks:Table_chunk
            RETURN all_chunks.Text AS content, all_chunks.UID AS chunk_uid, 
                   all_chunks.Name AS chunk_name, doc.UID AS doc_uid, doc.Name AS doc_name
            ORDER BY all_chunks.Name
            """
            
            results = self.session.run(query, {"chunk_uid": chunk_uid})
            chunks = []
            doc_info = None
            
            for record in results:
                if record["content"]:
                    chunks.append(record["content"])
                    if not doc_info:
                        doc_info = {
                            "doc_uid": record["doc_uid"],
                            "doc_name": record["doc_name"]
                        }
            
            if chunks:
                # Concatenate all chunks to reconstruct the full document
                full_document = "\n\n".join(chunks)
                self.document_cache[cache_key] = full_document
                print(f"✅ Neo4j full document retrieved: {len(chunks)} chunks, {len(full_document)} chars from {doc_info['doc_name']}")
                return full_document
            else:
                print(f"⚠️  No Neo4j document found for chunk UID: {chunk_uid}")
                
        except Exception as e:
            print(f"❌ Error retrieving full document from Neo4j: {e}")
        
        return None
    
    def get_document_by_uuid(self, document_uid):
        """
        Retrieve a full document from Neo4j by its document UID.
        This is used for reference document retrieval from AI responses.
        
        Args:
            document_uid: UID of the document to retrieve
            
        Returns:
            dict: Document information with content, name, and metadata or None if not found
        """
        cache_key = f"doc_uuid_{document_uid}"
        if cache_key in self.document_cache:
            return self.document_cache[cache_key]
        
        try:
            print(f"🔍 Retrieving document by UUID: {document_uid}")
            
            # Query to get document info and all its chunks
            query = """
            MATCH (doc:Document {UID: $document_uid})
            OPTIONAL MATCH (doc)-[:CHUNK]->(chunk)
            WHERE chunk:Text_chunk OR chunk:Table_chunk
            RETURN doc.Name AS doc_name, doc.Type AS doc_type, doc.File AS doc_file,
                   collect(chunk.Text) AS chunk_texts, collect(chunk.Name) AS chunk_names,
                   count(chunk) AS chunk_count
            """
            
            result = self.session.run(query, {"document_uid": document_uid})
            record = result.single()
            
            if not record:
                print(f"⚠️  No document found with UID: {document_uid}")
                return None
            
            # Extract document info
            doc_info = {
                "uid": document_uid,
                "name": record["doc_name"],
                "type": record["doc_type"],
                "file": record["doc_file"],
                "chunk_count": record["chunk_count"]
            }
            
            # Combine all chunk texts to reconstruct the full document
            chunk_texts = [text for text in record["chunk_texts"] if text]
            if chunk_texts:
                full_content = "\n\n".join(chunk_texts)
                doc_info["content"] = full_content
                doc_info["content_length"] = len(full_content)
                
                # Cache the result
                self.document_cache[cache_key] = doc_info
                
                print(f"✅ Document retrieved: {doc_info['name']} ({doc_info['chunk_count']} chunks, {len(full_content)} chars)")
                return doc_info
            else:
                print(f"⚠️  Document found but no content chunks: {doc_info['name']}")
                return None
                
        except Exception as e:
            print(f"❌ Error retrieving document by UUID {document_uid}: {e}")
            return None
    
    def summarize_document(self, document_text, query_context, max_tokens=1500):
        """
        Summarize a full document with focus on the query context.
        
        Args:
            document_text: Full document text to summarize
            query_context: The user's query to focus the summary
            max_tokens: Maximum tokens for the summary
            
        Returns:
            str: Summarized document text
        """
        print(f"    🤖 Using small LLM (gpt-4o-mini) for document summarization")
        print(f"       • Input document length: {self.count_tokens(document_text):,} tokens")
        print(f"       • Target summary length: {max_tokens:,} tokens")
        print(f"       • Query-focused: '{query_context[:100]}{'...' if len(query_context) > 100 else ''}'")
        
        # Track usage if RAG instance is available
        if self.rag_instance:
            self.rag_instance.small_llm_usage["document_summarization"] += 1
            self.rag_instance.small_llm_usage["total_calls"] += 1
        
        # Check cache first
        cache_key = f"{hash(document_text[:500])}_{hash(query_context)}"
        if cache_key in self.summary_cache:
            print(f"       ✅ Using cached summary")
            return self.summary_cache[cache_key]
        
        # If document is already short enough, return as is
        if self.count_tokens(document_text) <= max_tokens:
            print(f"       ✅ Document already under token limit, using full text")
            return document_text
        
        # Create summarization prompt
        prompt = f"""
        You are a document summarization expert. Please summarize the following document with particular focus on information relevant to this query: "{query_context}"

        Instructions:
        - Maintain all key factual information relevant to the query
        - Preserve important technical details, numbers, and citations
        - Keep the summary under {max_tokens} tokens
        - Structure the summary logically with clear sections if applicable
        - Retain any critical background information needed to understand the main points

        Document to summarize:
        {document_text}

        Summary:
        """
        
        try:
            print(f"       🔄 Generating query-focused summary...")
            response = self.summarizer.invoke(prompt)
            summary = response.content.strip()
            summary_tokens = self.count_tokens(summary)
            print(f"       ✅ Summary generated: {summary_tokens:,} tokens ({100*summary_tokens/self.count_tokens(document_text):.1f}% of original)")
            
            # Cache the summary
            self.summary_cache[cache_key] = summary
            return summary
            
        except Exception as e:
            print(f"       ❌ Error summarizing document: {e}")
            print(f"       🔄 Fallback: truncating document to {max_tokens} tokens")
            # Fallback: truncate the document
            tokens = self.tokenizer.encode(document_text)
            if len(tokens) > max_tokens:
                truncated_tokens = tokens[:max_tokens]
                return self.tokenizer.decode(truncated_tokens)
            return document_text
    
    def remove_duplicate_documents(self, documents_with_metadata, similarity_threshold=0.85):
        """
        Remove duplicate documents based on content similarity.
        
        Args:
            documents_with_metadata: List of (metadata, document_text) tuples
            similarity_threshold: Threshold for considering documents as duplicates
            
        Returns:
            List of unique documents
        """
        if len(documents_with_metadata) <= 1:
            return documents_with_metadata
        
        unique_docs = []
        seen_embeddings = []
        
        try:
            # Use OpenAI embeddings for similarity comparison
            from openai import OpenAI
            client = OpenAI(api_key=self.api_key)
            
            for metadata, doc_text in documents_with_metadata:
                # Generate embedding for current document
                if self.count_tokens(doc_text) > 8192:
                    # Use first part of document for embedding if too long
                    embed_text = self.tokenizer.decode(self.tokenizer.encode(doc_text)[:8192])
                else:
                    embed_text = doc_text
                
                response = client.embeddings.create(
                    model="text-embedding-3-small",
                    input=embed_text
                )
                current_embedding = np.array(response.data[0].embedding)
                
                # Normalize embedding
                current_embedding = current_embedding / np.linalg.norm(current_embedding)
                
                # Check for similarity with existing documents
                is_duplicate = False
                for seen_emb in seen_embeddings:
                    similarity = np.dot(current_embedding, seen_emb)
                    if similarity >= similarity_threshold:
                        is_duplicate = True
                        break
                
                if not is_duplicate:
                    unique_docs.append((metadata, doc_text))
                    seen_embeddings.append(current_embedding)
            
            return unique_docs
            
        except Exception as e:
            print(f"Error in duplicate removal: {e}")
            # Fallback: return all documents
            return documents_with_metadata
    
    def _get_detail_level_instructions(self, detail_level):
        """
        Get detail-level specific instructions for summarization.
        
        Args:
            detail_level: Summary, Balanced, Detailed, or Comprehensive
            
        Returns:
            tuple: (instruction_text, target_multiplier, expansion_guidance)
        """
        if detail_level == "Summary":
            return (
                """- Focus on key points, main findings, and essential conclusions
                - Use concise language while maintaining accuracy
                - Prioritize the most important information relevant to the query
                - Include only critical data points and major insights
                - Structure information clearly but without excessive detail""",
                0.7,  # Use 70% of target tokens
                "concise but comprehensive coverage of key points"
            )
        elif detail_level == "Balanced":
            return (
                """- Provide a well-balanced mix of overview and detail
                - Include important findings with supporting context
                - Balance breadth of coverage with depth of explanation
                - Include relevant data, methods, and key insights
                - Provide sufficient detail for understanding without overwhelming""",
                1.0,  # Use 100% of target tokens
                "balanced coverage with appropriate level of detail"
            )
        elif detail_level == "Detailed":
            return (
                """- Provide comprehensive coverage with extensive detail
                - Include detailed explanations of methods, procedures, and findings
                - Expand on technical specifications and quantitative data
                - Include background information and theoretical foundations
                - Provide extensive context and implications
                - Include specific examples, case studies, and applications""",
                1.3,  # Use 130% of target tokens (system will compress as needed)
                "comprehensive and detailed coverage with extensive explanations"
            )
        else:  # Comprehensive
            return (
                """- Provide exhaustive coverage of all relevant information
                - Include comprehensive explanations with maximum detail
                - Cover all methodological details, technical specifications, and data
                - Provide extensive background, context, and theoretical foundations
                - Include all examples, case studies, applications, and implications
                - Expand on every relevant concept with full explanations
                - Include detailed analysis and comprehensive interpretation""",
                1.5,  # Use 150% of target tokens (system will compress as needed)
                "exhaustive and comprehensive coverage with maximum detail"
            )
    
    def create_initial_summary(self, document_text, query_context, target_tokens=2000, detail_level="Balanced"):
        """
        Create an initial comprehensive summary from the first document.
        
        Args:
            document_text: Text of the first document
            query_context: The user's query for context
            target_tokens: Target length for the initial summary
            detail_level: Summary, Balanced, Detailed, or Comprehensive
            
        Returns:
            str: Initial comprehensive summary
        """
        print(f"    🤖 Using small LLM (gpt-4o-mini) for initial comprehensive summary")
        print(f"       • Creating foundation summary from first document")
        print(f"       • Detail level: {detail_level}")
        
        # Track usage if RAG instance is available
        if self.rag_instance:
            self.rag_instance.small_llm_usage["document_summarization"] += 1
            self.rag_instance.small_llm_usage["total_calls"] += 1
        
        # Get detail-level specific instructions
        detail_instructions, target_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
        adjusted_target = int(target_tokens * target_multiplier)
        
        # If document is short enough, use as foundation
        if self.count_tokens(document_text) <= adjusted_target:
            print(f"       ✅ Document under target length, using as foundation")
            return f"**Comprehensive Summary ({detail_level} level): {query_context}**\n\n{document_text}"
        
        prompt = f"""
        You are creating the initial comprehensive summary for the user's query. This will be the foundation that gets enriched with additional documents.

        User Query: "{query_context}"
        Detail Level: {detail_level}

        Instructions:
        - Create a structured, comprehensive summary that directly addresses the user's query
        - Organize information into clear sections with headers and subheadings
        - Use a format that can be easily expanded with additional information
        - CRITICAL: Aim for EXACTLY {adjusted_target} tokens - focus on {expansion_guidance}
        - Use comprehensive markdown formatting with multiple levels of structure
        
        Detail Level Specific Instructions:
        {detail_instructions}

        Document to summarize:
        {document_text}

        Comprehensive Summary ({detail_level} level, MUST be approximately {adjusted_target:,} tokens):
        """
        
        try:
            print(f"       🔄 Generating foundation summary...")
            response = self.summarizer.invoke(prompt)
            summary = response.content.strip()
            summary_tokens = self.count_tokens(summary)
            print(f"       ✅ Foundation summary created: {summary_tokens:,} tokens")
            return summary
            
        except Exception as e:
            print(f"       ❌ Error creating initial summary: {e}")
            # Fallback: truncate document
            tokens = self.tokenizer.encode(document_text)
            if len(tokens) > adjusted_target:
                truncated_tokens = tokens[:adjusted_target]
                return f"**Comprehensive Summary ({detail_level} level): {query_context}**\n\n" + self.tokenizer.decode(truncated_tokens)
            return f"**Comprehensive Summary ({detail_level} level): {query_context}**\n\n{document_text}"
    
    def enrich_summary(self, existing_summary, new_document, query_context, target_tokens=8000, detail_level="Balanced"):
        """
        Enrich an existing summary with information from a new document.
        Implements smart chunking for large documents to avoid token limit violations.
        
        Args:
            existing_summary: Current comprehensive summary
            new_document: New document text to integrate
            query_context: User's query for context
            target_tokens: Target length for enriched summary
            detail_level: Summary, Balanced, Detailed, or Comprehensive
            
        Returns:
            str: Enriched comprehensive summary
        """
        print(f"    🤖 Using small LLM (gpt-4o-mini) for summary enrichment")
        print(f"       • Integrating new document into existing summary")
        print(f"       • Detail level: {detail_level}")
        
        # Track usage if RAG instance is available
        if self.rag_instance:
            self.rag_instance.small_llm_usage["document_summarization"] += 1
            self.rag_instance.small_llm_usage["total_calls"] += 1
        
        # Get detail-level specific instructions
        detail_instructions, target_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
        adjusted_target = int(target_tokens * target_multiplier)
        
        existing_tokens = self.count_tokens(existing_summary)
        new_doc_tokens = self.count_tokens(new_document)
        
        print(f"       • Existing summary: {existing_tokens:,} tokens")
        print(f"       • New document: {new_doc_tokens:,} tokens")
        print(f"       • Target tokens: {adjusted_target:,}")
        
        # Calculate available space for expansion
        available_expansion = adjusted_target - existing_tokens
        print(f"       • Available expansion space: {available_expansion:,} tokens")
        
        # Check if input will exceed context limits for gpt-4o-mini (128k context)
        prompt_overhead = 1000  # Estimated prompt overhead
        max_context_mini = 125000  # Conservative limit for gpt-4o-mini
        total_input = existing_tokens + new_doc_tokens + prompt_overhead
        
        if total_input > max_context_mini:
            print(f"       ⚠️  Input too large ({total_input:,} tokens), implementing document chunking...")
            return self._enrich_with_chunking(existing_summary, new_document, query_context, adjusted_target, detail_level)
        
        prompt = f"""
        You are enriching a comprehensive summary with new information. Your task is to integrate the new document's relevant information into the existing summary while expanding its scope and detail.

        User Query: "{query_context}"
        Detail Level: {detail_level}

        Instructions:
        - EXPAND the existing summary by integrating relevant information from the new document
        - Add new sections if the new document covers different aspects
        - Enrich existing sections with additional details, data, and insights from the new document
        - Avoid redundancy - don't repeat information already covered
        - Maintain the structured, organized format
        - Prioritize information most relevant to the user's query
        - TARGET LENGTH: approximately {adjusted_target} tokens (focus on {expansion_guidance})
        - Preserve all important details, facts, figures, and specific findings
        - When adding information, be specific about sources and context
        
        Detail Level Specific Instructions:
        {detail_instructions}

        CURRENT COMPREHENSIVE SUMMARY ({existing_tokens:,} tokens):
        {existing_summary}

        NEW DOCUMENT TO INTEGRATE ({new_doc_tokens:,} tokens):
        {new_document}

        ENRICHED COMPREHENSIVE SUMMARY ({detail_level} level, target: ~{adjusted_target:,} tokens):
        """
        
        try:
            print(f"       🔄 Enriching summary with new information...")
            response = self.summarizer.invoke(prompt)
            enriched_summary = response.content.strip()
            enriched_tokens = self.count_tokens(enriched_summary)
            compression_ratio = (existing_tokens + new_doc_tokens) / enriched_tokens if enriched_tokens > 0 else 1
            print(f"       ✅ Summary enriched: {enriched_tokens:,} tokens (compression: {compression_ratio:.1f}x)")
            return enriched_summary
            
        except Exception as e:
            print(f"       ❌ Error enriching summary: {e}")
            # Fallback to chunking if regular approach fails
            print(f"       🔄 Falling back to document chunking...")
            return self._enrich_with_chunking(existing_summary, new_document, query_context, adjusted_target, detail_level)

    def _enrich_with_chunking(self, existing_summary, new_document, query_context, target_tokens, detail_level="Balanced"):
        """
        Enrich summary by processing large documents in chunks to avoid token limits.
        
        Args:
            existing_summary: Current summary
            new_document: Large document to chunk and process
            query_context: User's query for context
            target_tokens: Target output tokens
            
        Returns:
            str: Enriched summary
        """
        print(f"       📊 Chunking large document for processing")
        
        # Calculate chunk size based on available context
        existing_tokens = self.count_tokens(existing_summary)
        max_context = 125000
        prompt_overhead = 1000
        available_for_doc = max_context - existing_tokens - prompt_overhead
        
        # Split document into manageable chunks
        chunk_size = min(available_for_doc // 2, 40000)  # Conservative chunk size
        chunks = self._split_document_into_chunks(new_document, chunk_size)
        
        print(f"       📄 Split document into {len(chunks)} chunks (max {chunk_size:,} tokens each)")
        
        current_summary = existing_summary
        
        for i, chunk in enumerate(chunks):
            print(f"       🔄 Processing chunk {i+1}/{len(chunks)}")
            
            chunk_tokens = self.count_tokens(chunk)
            current_tokens = self.count_tokens(current_summary)
            
            # Check if we've reached target
            if current_tokens >= target_tokens:
                print(f"       🎯 Reached target tokens ({current_tokens:,}/{target_tokens:,}), stopping chunk processing")
                break
            
            # Process this chunk
            prompt = f"""
            You are enriching a comprehensive summary with a chunk of new information. Focus on integrating the most relevant and important information from this chunk.

            User Query: "{query_context}"

            Instructions:
            - EXPAND the existing summary by integrating relevant information from this document chunk
            - Add new sections or enrich existing sections as appropriate
            - Avoid redundancy with information already in the summary
            - Prioritize information most relevant to the user's query
            - TARGET LENGTH: approximately {target_tokens} tokens
            - Be comprehensive but avoid unnecessary repetition

            CURRENT COMPREHENSIVE SUMMARY ({current_tokens:,} tokens):
            {current_summary}

            DOCUMENT CHUNK TO INTEGRATE (chunk {i+1}/{len(chunks)}, {chunk_tokens:,} tokens):
            {chunk}

            ENRICHED COMPREHENSIVE SUMMARY (target: ~{target_tokens:,} tokens):
            """
            
            try:
                response = self.summarizer.invoke(prompt)
                current_summary = response.content.strip()
                new_tokens = self.count_tokens(current_summary)
                print(f"       ✅ Chunk processed: {new_tokens:,} tokens")
                
            except Exception as e:
                print(f"       ❌ Error processing chunk {i+1}: {e}")
                # Skip this chunk and continue
                continue
        
        final_tokens = self.count_tokens(current_summary)
        print(f"       ✅ Chunked processing complete: {final_tokens:,} tokens")
        return current_summary

    def _split_document_into_chunks(self, document, max_chunk_tokens):
        """
        Split a document into chunks of approximately max_chunk_tokens size.
        
        Args:
            document: Text to split
            max_chunk_tokens: Maximum tokens per chunk
            
        Returns:
            list: List of document chunks
        """
        # Use tiktoken for accurate token counting
        tokens = self.tokenizer.encode(document)
        
        chunks = []
        for i in range(0, len(tokens), max_chunk_tokens):
            chunk_tokens = tokens[i:i + max_chunk_tokens]
            chunk_text = self.tokenizer.decode(chunk_tokens)
            chunks.append(chunk_text)
        
        return chunks
    
    def consolidate_with_large_llm(self, summary, additional_documents, query_context, target_tokens=8000, detail_level="Balanced"):
        """
        Use the large LLM to consolidate a summary with a batch of additional documents.
        This is more efficient than incremental enrichment for larger batches.
        Implements dynamic batch sizing to respect token limits.
        
        Args:
            summary: Current summary to consolidate
            additional_documents: List of document texts to integrate
            query_context: User's query for context
            target_tokens: Target length for consolidated summary
            detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive)
            
        Returns:
            str: Consolidated comprehensive summary
        """
        print(f"    🧠 Using large LLM for batch consolidation")
        print(f"       • Consolidating summary with {len(additional_documents)} documents")
        
        # Track usage if RAG instance is available
        if self.rag_instance:
            self.rag_instance.large_llm_usage += 1
        
        summary_tokens = self.count_tokens(summary)
        
        # Check total token count and implement dynamic batching if necessary
        docs_text = "\n\n---DOCUMENT SEPARATOR---\n\n".join(additional_documents)
        docs_tokens = self.count_tokens(docs_text)
        
        print(f"       • Current summary: {summary_tokens:,} tokens")
        print(f"       • Additional documents: {docs_tokens:,} tokens")
        print(f"       • Target output: {target_tokens:,} tokens")
        
        # Calculate total input tokens including prompt overhead (approximately 1000 tokens)
        prompt_overhead = 1000
        total_input_tokens = summary_tokens + docs_tokens + prompt_overhead
        max_context_length = 125000  # Conservative limit for gpt-4o (128k context)
        
        # If input exceeds context limit, implement dynamic batching
        if total_input_tokens > max_context_length:
            print(f"       ⚠️  Input too large ({total_input_tokens:,} tokens), implementing dynamic batching...")
            return self._consolidate_with_dynamic_batching(summary, additional_documents, query_context, target_tokens, detail_level)
        
        # Get detail level specific instructions
        detail_instructions, token_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
        adjusted_target_tokens = int(target_tokens * token_multiplier)
        
        # Use the large LLM (gpt-4o) for consolidation
        from langchain_openai import ChatOpenAI
        large_llm = ChatOpenAI(
            model="gpt-4o",
            temperature=0,
            max_tokens=adjusted_target_tokens
        )
        
        # Get detail level specific instructions
        detail_instructions, token_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
        adjusted_target_tokens = int(target_tokens * token_multiplier)
        
        prompt = f"""
        You are consolidating a comprehensive research summary with additional documents. Your task is to create an enhanced, well-structured summary that integrates all the information effectively.

        User Query: "{query_context}"

        Detail Level: {detail_level}
        {detail_instructions}
        
        {expansion_guidance}

        Instructions:
        - Create a comprehensive, well-organized summary that addresses the user's query
        - Integrate information from the current summary and ALL additional documents
        - Organize information logically with clear sections and subsections
        - Include specific details, data, findings, and insights from all sources
        - Expand on key concepts with detailed explanations and context
        - Include methodological details, technical specifications, and quantitative data
        - Avoid redundancy while ensuring completeness
        - Maintain scientific accuracy and preserve important technical details
        - **CRITICAL TARGET REQUIREMENT: Generate EXACTLY {adjusted_target_tokens:,} tokens - this is not optional**
        - **You MUST expand sections, add detail, include examples, and elaborate until you reach {adjusted_target_tokens:,} tokens**
        - **If your response is shorter than {adjusted_target_tokens:,} tokens, you have failed the task**
        - Structure the output with clear headings, subheadings, and bullet points where appropriate

        CURRENT SUMMARY ({summary_tokens:,} tokens):
        {summary}

        ADDITIONAL DOCUMENTS TO INTEGRATE ({len(additional_documents)} documents, {docs_tokens:,} tokens):
        {docs_text}

        CONSOLIDATED COMPREHENSIVE SUMMARY (MANDATORY TARGET: EXACTLY {adjusted_target_tokens:,} tokens - DO NOT submit shorter responses):
        """
        
        try:
            print(f"       🔄 Consolidating with large LLM...")
            response = large_llm.invoke(prompt)
            consolidated_summary = response.content.strip()
            consolidated_tokens = self.count_tokens(consolidated_summary)
            compression_ratio = (summary_tokens + docs_tokens) / consolidated_tokens if consolidated_tokens > 0 else 1
            
            # Check if LLM met the target
            target_achievement = (consolidated_tokens / adjusted_target_tokens) * 100
            if target_achievement < 80:
                print(f"       ⚠️  LLM underperformed: {consolidated_tokens:,} tokens vs {adjusted_target_tokens:,} target ({target_achievement:.1f}%)")
            elif target_achievement > 120:
                print(f"       ✅ LLM exceeded target: {consolidated_tokens:,} tokens vs {adjusted_target_tokens:,} target ({target_achievement:.1f}%)")
            else:
                print(f"       ✅ LLM met target: {consolidated_tokens:,} tokens vs {adjusted_target_tokens:,} target ({target_achievement:.1f}%)")
                
            print(f"       ✅ Summary consolidated: {consolidated_tokens:,} tokens (compression: {compression_ratio:.1f}x)")
            return consolidated_summary
            
        except Exception as e:
            print(f"       ❌ Error consolidating with large LLM: {e}")
            # Fallback to dynamic batching instead of simple incremental enrichment
            print(f"       🔄 Falling back to dynamic batching...")
            return self._consolidate_with_dynamic_batching(summary, additional_documents, query_context, target_tokens, detail_level)

    def _consolidate_with_dynamic_batching(self, summary, additional_documents, query_context, target_tokens=8000, detail_level="Balanced"):
        """
        Consolidate documents using dynamic batch sizing to respect token limits.
        
        Args:
            summary: Current summary
            additional_documents: List of document texts to integrate
            query_context: User's query for context
            target_tokens: Target length for consolidated summary
            detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive)
            
        Returns:
            str: Consolidated summary
        """
        print(f"       📊 Dynamic batching: Processing {len(additional_documents)} documents in token-aware batches")
        
        current_summary = summary
        summary_tokens = self.count_tokens(summary)
        
        i = 0
        while i < len(additional_documents):
            # Determine optimal batch size based on current token counts
            batch_size = self._calculate_optimal_batch_size(
                current_summary, additional_documents[i:], target_tokens
            )
            
            # Process the batch
            batch_end = min(i + batch_size, len(additional_documents))
            batch = additional_documents[i:batch_end]
            
            print(f"       🔄 Dynamic batch {(i//batch_size)+1}: processing {len(batch)} documents (docs {i+1}-{batch_end})")
            
            if len(batch) == 1:
                # Single document: use enrichment instead of large LLM
                current_summary = self.enrich_summary(current_summary, batch[0], query_context, target_tokens, detail_level)
            else:
                # Multiple documents: try large LLM with current batch
                try:
                    batch_text = "\n\n---DOCUMENT SEPARATOR---\n\n".join(batch)
                    batch_tokens = self.count_tokens(batch_text)
                    current_tokens = self.count_tokens(current_summary)
                    
                    # Check if this batch will fit
                    total_input = current_tokens + batch_tokens + 1000  # 1000 for prompt overhead
                    if total_input > 125000:
                        print(f"       ⚠️  Batch still too large ({total_input:,} tokens), processing individually...")
                        # Process each document individually
                        for doc in batch:
                            current_summary = self.enrich_summary(current_summary, doc, query_context, target_tokens, detail_level)
                    else:
                        # Use large LLM for batch
                        current_summary = self._consolidate_batch_with_large_llm(
                            current_summary, batch, query_context, target_tokens, detail_level
                        )
                        
                except Exception as e:
                    print(f"       ❌ Error in dynamic batch processing: {e}")
                    # Fallback to individual processing
                    for doc in batch:
                        current_summary = self.enrich_summary(current_summary, doc, query_context, target_tokens, detail_level)
            
            i = batch_end
            
            # Check if we've reached target token size
            current_tokens = self.count_tokens(current_summary)
            if current_tokens >= target_tokens:
                print(f"       🎯 Reached target tokens ({current_tokens:,}/{target_tokens:,}), stopping dynamic batching")
                break
        
        return current_summary

    def _calculate_optimal_batch_size(self, current_summary, remaining_documents, target_tokens):
        """
        Calculate optimal batch size based on token limits and current state.
        
        Args:
            current_summary: Current summary text
            remaining_documents: List of remaining documents to process
            target_tokens: Target output tokens
            
        Returns:
            int: Optimal batch size
        """
        if not remaining_documents:
            return 0
            
        summary_tokens = self.count_tokens(current_summary)
        max_context = 125000  # Conservative limit
        prompt_overhead = 1000
        available_tokens = max_context - summary_tokens - prompt_overhead
        
        # Start with batch size of 1 and increase until we hit limits
        batch_size = 1
        cumulative_tokens = 0
        
        for i, doc in enumerate(remaining_documents):
            doc_tokens = self.count_tokens(doc)
            if cumulative_tokens + doc_tokens > available_tokens:
                break
            cumulative_tokens += doc_tokens
            batch_size = i + 1
            
            # Cap at reasonable batch size to avoid extremely long processing
            if batch_size >= 10:
                break
                
        return max(1, batch_size)  # Ensure at least 1 document

    def _consolidate_batch_with_large_llm(self, summary, batch_documents, query_context, target_tokens, detail_level="Balanced"):
        """
        Consolidate a specific batch with the large LLM.
        """
        # Get detail level specific instructions
        detail_instructions, token_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
        adjusted_target_tokens = int(target_tokens * token_multiplier)
        
        from langchain_openai import ChatOpenAI
        large_llm = ChatOpenAI(
            model="gpt-4o",
            temperature=0,
            max_tokens=adjusted_target_tokens
        )
        
        docs_text = "\n\n---DOCUMENT SEPARATOR---\n\n".join(batch_documents)
        
        prompt = f"""
        You are consolidating a comprehensive research summary with a batch of additional documents. Integrate all information effectively.

        User Query: "{query_context}"
        
        Detail Level: {detail_level}
        {detail_instructions}
        
        {expansion_guidance}

        Instructions:
        - Enhance the existing summary with information from ALL batch documents
        - Organize information logically with clear sections and subsections
        - Include specific details, data, findings, and insights from all sources
        - Avoid redundancy while ensuring completeness
        - IMPORTANT: Aim for approximately {adjusted_target_tokens} tokens - be comprehensive and detailed
        - Structure with clear headings and formatting

        CURRENT SUMMARY:
        {summary}

        BATCH DOCUMENTS TO INTEGRATE:
        {docs_text}

        ENHANCED COMPREHENSIVE SUMMARY:
        """
        
        response = large_llm.invoke(prompt)
        return response.content.strip()
    
    def consolidate_with_large_llm_and_citations(self, summary, additional_documents, document_metadata, query_context, target_tokens=8000, start_block_num=1, use_inline_citations=True):
        """
        Use the large LLM to consolidate a summary with a batch of additional documents, with optional citations.
        
        Args:
            summary: Current summary to consolidate
            additional_documents: List of document texts to integrate
            document_metadata: List of metadata for each document
            query_context: User's query for context
            target_tokens: Target length for consolidated summary
            start_block_num: Starting block number for citations
            use_inline_citations: Whether to include inline citations (False for extensive search mode)
            
        Returns:
            str: Consolidated comprehensive summary with or without inline citations
        """
        print(f"    🧠 Using large LLM for batch consolidation {'with citations' if use_inline_citations else 'without citations'}")
        print(f"       • Consolidating summary with {len(additional_documents)} documents")
        
        # Track usage if RAG instance is available
        if self.rag_instance:
            self.rag_instance.large_llm_usage += 1
        
        summary_tokens = self.count_tokens(summary) if summary else 0
        
        # Prepare documents - with or without block numbers for citation
        if use_inline_citations:
            numbered_documents = []
            block_nums = []
            for i, (doc, metadata) in enumerate(zip(additional_documents, document_metadata)):
                # Calculate the actual block number that will be used for this document
                # The first block is the comprehensive summary, then individual sources follow
                block_num = start_block_num + 1 + i  # +1 because comprehensive summary takes the first block
                block_nums.append(block_num)
                title = metadata.get('title', f"Document {i+1}")
                numbered_doc = f"**[block {block_num}] {title}**\n{doc}"
                numbered_documents.append(numbered_doc)
        else:
            # For extensive search mode: simple document preparation without block numbers
            numbered_documents = []
            block_nums = []
            for i, (doc, metadata) in enumerate(zip(additional_documents, document_metadata)):
                title = metadata.get('title', f"Document {i+1}")
                numbered_doc = f"**{title}**\n{doc}"
                numbered_documents.append(numbered_doc)
        
        docs_text = "\n\n---DOCUMENT SEPARATOR---\n\n".join(numbered_documents)
        docs_tokens = self.count_tokens(docs_text)
        
        print(f"       • Current summary: {summary_tokens:,} tokens")
        print(f"       • Additional documents: {docs_tokens:,} tokens")
        print(f"       • Target output: {target_tokens:,} tokens")
        if use_inline_citations:
            print(f"       • Block numbers: {block_nums}")
        
        # Use the large LLM (gpt-4o) for consolidation
        from langchain_openai import ChatOpenAI
        large_llm = ChatOpenAI(
            model="gpt-4o",
            temperature=0,
            max_tokens=target_tokens
        )
        
        current_summary_part = f"\n\nCURRENT SUMMARY ({summary_tokens:,} tokens):\n{summary}" if summary else ""
        
        if use_inline_citations:
            # Standard mode with inline citations
            prompt = f"""
        You are consolidating a comprehensive research summary with additional documents. Your task is to create an enhanced, well-structured summary that integrates all the information effectively WITH PROPER INLINE CITATIONS.

        User Query: "{query_context}"

        CRITICAL CITATION INSTRUCTIONS:
        - You MUST include inline citations using the block numbers provided: [block X]
        - When you reference information from a document, immediately cite it: [block X]
        - Multiple sources for the same point: [block X, block Y]
        - Every factual claim, data point, or specific finding MUST be cited
        - Citations should be placed at the end of sentences or claims they support

        CONTENT INSTRUCTIONS:
        - Create a comprehensive, well-organized summary that addresses the user's query
        - Integrate information from the current summary and ALL additional documents
        - Organize information logically with clear sections and subsections
        - Include specific details, data, findings, and insights from all sources
        - Expand on key concepts with detailed explanations and context
        - Include methodological details, technical specifications, and quantitative data
        - Avoid redundancy while ensuring completeness
        - Maintain scientific accuracy and preserve important technical details
        - IMPORTANT: Aim for EXACTLY {target_tokens} tokens - be comprehensive and detailed to reach this target
        - Structure the output with clear headings, subheadings, and bullet points where appropriate{current_summary_part}

        ADDITIONAL DOCUMENTS TO INTEGRATE ({len(additional_documents)} documents, {docs_tokens:,} tokens):
        {docs_text}

        CONSOLIDATED COMPREHENSIVE SUMMARY WITH CITATIONS (MUST be approximately {target_tokens:,} tokens):
        """
        else:
            # Extensive search mode: Generate citations for tracking but will strip them later
            prompt = f"""
        You are consolidating a comprehensive research summary with additional documents. Your task is to create an enhanced, well-structured summary that integrates all the information effectively WITH PROPER INLINE CITATIONS for source tracking.

        User Query: "{query_context}"

        CRITICAL CITATION INSTRUCTIONS:
        - You MUST include inline citations using the block numbers provided: [block X]
        - When you reference information from a document, immediately cite it: [block X]
        - Multiple sources for the same point: [block X, block Y]
        - Every factual claim, data point, or specific finding MUST be cited
        - Citations should be placed at the end of sentences or claims they support
        - These citations will be used for source tracking and then processed for final formatting

        CONTENT INSTRUCTIONS:
        - Create a comprehensive, well-organized summary that addresses the user's query
        - Integrate information from the current summary and ALL additional documents
        - Organize information logically with clear sections and subsections
        - Include specific details, data, findings, and insights from all sources
        - Expand on key concepts with detailed explanations and context
        - Include methodological details, technical specifications, and quantitative data
        - Avoid redundancy while ensuring completeness
        - Maintain scientific accuracy and preserve important technical details
        - IMPORTANT: Aim for EXACTLY {target_tokens} tokens - be comprehensive and detailed to reach this target
        - Structure the output with clear headings, subheadings, and bullet points where appropriate{current_summary_part}

        ADDITIONAL DOCUMENTS TO INTEGRATE ({len(additional_documents)} documents, {docs_tokens:,} tokens):
        {docs_text}

        CONSOLIDATED COMPREHENSIVE SUMMARY WITH CITATIONS (MUST be approximately {target_tokens:,} tokens):
        """
        
        try:
            print(f"       🔄 Consolidating with large LLM {'and citations' if use_inline_citations else 'with citations for tracking (will be processed)'}...")
            response = large_llm.invoke(prompt)
            consolidated_summary = response.content.strip()
            
            # In extensive search mode: extract citations and clean text
            if not use_inline_citations:
                print(f"       🔧 Processing citations for extensive search mode...")
                # Extract all citations from the text for source tracking
                import re
                citations_found = re.findall(r'\[block \d+\]', consolidated_summary)
                unique_citations = list(set(citations_found))
                print(f"       📋 Citations found for source tracking: {len(unique_citations)} unique citations")
                
                # Strip all citations from the text for clean reading
                clean_summary = re.sub(r'\[block \d+\]', '', consolidated_summary)
                # Clean up any double spaces left by citation removal
                clean_summary = re.sub(r'\s+', ' ', clean_summary)
                # Clean up spacing around punctuation
                clean_summary = re.sub(r'\s+([.,;:])', r'\1', clean_summary)
                
                consolidated_summary = clean_summary.strip()
                print(f"       ✅ Citations extracted and text cleaned for extensive search mode")
                
                # Store citation info for reference building (if needed by calling code)
                if hasattr(response, 'citations_extracted'):
                    response.citations_extracted = unique_citations
                else:
                    # Add citation info as attribute for later use
                    consolidated_summary = consolidated_summary + f"\n\n<!-- CITATIONS_EXTRACTED: {','.join(unique_citations)} -->"
            
            consolidated_tokens = self.count_tokens(consolidated_summary)
            compression_ratio = (summary_tokens + docs_tokens) / consolidated_tokens if consolidated_tokens > 0 else 1
            print(f"       ✅ Summary consolidated {'with citations' if use_inline_citations else 'and cleaned'}: {consolidated_tokens:,} tokens (compression: {compression_ratio:.1f}x)")
            return consolidated_summary
            
        except Exception as e:
            print(f"       ❌ Error consolidating with large LLM: {e}")
            # Fallback to simple concatenation
            if use_inline_citations:
                # Fallback with basic citations
                if summary:
                    fallback = f"{summary}\n\n**Additional Information:**\n"
                else:
                    fallback = "**Comprehensive Summary:**\n\n"
                
                for i, doc in enumerate(additional_documents):
                    block_num = start_block_num + i
                    title = document_metadata[i].get('title', f'Document {i+1}')
                    fallback += f"\n**From {title} [block {block_num}]:**\n{doc[:500]}{'...' if len(doc) > 500 else ''}\n"
            else:
                # Fallback without citations (extensive search mode)
                if summary:
                    fallback = f"{summary}\n\n**Additional Information:**\n"
                else:
                    fallback = "**Comprehensive Summary:**\n\n"
                
                for i, doc in enumerate(additional_documents):
                    title = document_metadata[i].get('title', f'Document {i+1}')
                    fallback += f"\n**From {title}:**\n{doc[:500]}{'...' if len(doc) > 500 else ''}\n"
            
            return fallback
    
    def process_documents_with_source_tracking(self, documents, query_context, target_summary_tokens=8000, batch_size=10, use_inline_citations=False, disable_citations=False, detail_level="Balanced"):
        """
        Process documents with improved source tracking and batch processing using large LLM.
        
        Args:
            documents: List of (metadata, content) tuples
            query_context: User's query for context
            target_summary_tokens: Target length for final summary
            batch_size: Number of documents to process in each batch with large LLM
            use_inline_citations: Whether to include inline citations (False for extensive search mode)
            disable_citations: Whether to completely disable citation logic (True for extensive search mode)
            detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive)
            
        Returns:
            tuple: (comprehensive_summary, source_mapping, individual_summaries)
        """
        print(f"    📚 Processing {len(documents)} documents with source tracking")
        print(f"       • Batch size: {batch_size} documents per large LLM call")
        print(f"       • Target summary tokens: {target_summary_tokens:,}")
        if disable_citations:
            print(f"       • Citations: Completely disabled (extensive search mode)")
        else:
            print(f"       • Inline citations: {'Enabled' if use_inline_citations else 'Disabled (extensive search mode)'}")
        
        source_mapping = {}  # Maps content sections to source documents
        individual_summaries = []  # List of individual document summaries
        comprehensive_summary = ""
        doc_batch = []
        doc_metadata_batch = []
        documents_processed = 0
        
        for i, (metadata, content) in enumerate(documents):
            documents_processed += 1
            doc_batch.append(content)
            doc_metadata_batch.append(metadata)
            
            # Store individual document info for source tracking
            doc_id = f"doc_{i+1}"
            individual_summaries.append({
                'id': doc_id,
                'metadata': metadata,
                'content_preview': content[:200] + "..." if len(content) > 200 else content
            })
            
            # Process batch when we reach batch_size or end of documents
            if len(doc_batch) >= batch_size or i == len(documents) - 1:
                print(f"       🔄 Processing batch {(i//batch_size)+1}: documents {i+2-len(doc_batch)} to {i+1}")
                
                if comprehensive_summary == "":
                    # First batch: create initial comprehensive summary with generous allocation
                    if len(doc_batch) == 1:
                        # Single document: use create_initial_summary with generous initial size
                        # For single docs, use 90% of target, minimum 1500
                        initial_target = max(1500, int(target_summary_tokens * 0.9))
                        print(f"       • Single document target: {initial_target:,} tokens (from user target: {target_summary_tokens:,})")
                        comprehensive_summary = self.create_initial_summary(
                            doc_batch[0], 
                            query_context,
                            target_tokens=initial_target,
                            detail_level=detail_level
                        )
                    else:
                        # Multiple documents: use large LLM for batch processing with generous initial target
                        # Make the algorithm more responsive to user's target token request
                        # For targets <= 6000: use 80% of target, minimum 2000
                        # For targets > 6000: use 70% of target, minimum 3000
                        if target_summary_tokens <= 6000:
                            initial_target = max(2000, int(target_summary_tokens * 0.8))
                        else:
                            initial_target = max(3000, int(target_summary_tokens * 0.7))
                        
                        print(f"       • Target output: {initial_target:,} tokens (from user target: {target_summary_tokens:,})")
                        if disable_citations:
                            # Use simple consolidation without citations
                            comprehensive_summary = self.consolidate_with_large_llm(
                                "", 
                                doc_batch, 
                                query_context,
                                target_tokens=initial_target,
                                detail_level=detail_level
                            )
                        else:
                            # Use citation-enabled consolidation
                            comprehensive_summary = self.consolidate_with_large_llm_and_citations(
                                "", 
                                doc_batch, 
                                doc_metadata_batch,
                                query_context,
                                target_tokens=initial_target,
                                start_block_num=self.rag_instance.block_counter if self.rag_instance else 1,
                                use_inline_citations=use_inline_citations
                            )
                else:
                    # Subsequent batches: consolidate with existing summary
                    if disable_citations:
                        # Use simple consolidation without citations
                        comprehensive_summary = self.consolidate_with_large_llm(
                            comprehensive_summary,
                            doc_batch,
                            query_context,
                            target_tokens=target_summary_tokens,
                            detail_level=detail_level
                        )
                    else:
                        # Use citation-enabled consolidation
                        comprehensive_summary = self.consolidate_with_large_llm_and_citations(
                            comprehensive_summary,
                            doc_batch,
                            doc_metadata_batch,
                            query_context,
                            target_tokens=target_summary_tokens,
                            start_block_num=self.rag_instance.block_counter if self.rag_instance else 1,
                            use_inline_citations=use_inline_citations
                        )
                
                # Track which documents contributed to current summary section
                batch_start = i + 2 - len(doc_batch)
                batch_end = i + 1
                source_mapping[f"batch_{(i//batch_size)+1}"] = {
                    'documents': list(range(batch_start, batch_end + 1)),
                    'metadata': doc_metadata_batch.copy()
                }
                
                # Clear batch for next iteration
                doc_batch = []
                doc_metadata_batch = []
                
                print(f"         ✅ Batch processed: {self.count_tokens(comprehensive_summary):,} tokens")
                
                # More aggressive termination logic to reach target size
                current_tokens = self.count_tokens(comprehensive_summary)
                
                # Only stop if we've truly exceeded target by a reasonable margin OR processed all documents
                if current_tokens >= target_summary_tokens * 1.1:  # 110% of target
                    print(f"         🎯 Significantly exceeded target summary size ({current_tokens:,} > {target_summary_tokens * 1.1:,.0f}) with {documents_processed} documents")
                    break
                elif documents_processed >= len(documents):
                    print(f"         📋 All documents processed ({documents_processed}/{len(documents)}) - final size: {current_tokens:,} tokens")
                    break
                elif current_tokens >= target_summary_tokens * 0.98 and documents_processed >= len(documents) * 0.8:
                    print(f"         🎯 Near target size ({current_tokens:,}/{target_summary_tokens:,}) and processed most documents ({documents_processed}/{len(documents)})")
                    break
        
        print(f"     ✅ Document processing complete:")
        print(f"       • Documents processed: {documents_processed}/{len(documents)}")
        print(f"       • Final summary: {self.count_tokens(comprehensive_summary):,} tokens")
        print(f"       • Source batches: {len(source_mapping)}")
        
        return comprehensive_summary, source_mapping, individual_summaries

Parameters

Name	Type	Default	Kind
`bases`	-	-

Parameter Details

bases: Parameter of type

Return Value

Returns unspecified type

Class Interface

Methods

`init(self, session, chroma_client, api_key, rag_instance)`

Purpose: Internal method: init

Parameters:

session: Parameter
chroma_client: Parameter
api_key: Parameter
rag_instance: Parameter

Returns: None

`count_tokens(self, text)`

Purpose: Count tokens in text

Parameters:

text: Parameter

Returns: None

`get_full_document(self, chunk_metadata, collection_name)`

Purpose: Retrieve the full document that a chunk belongs to by using Neo4j to find all chunks from the same document and reconstruct the complete document. Args: chunk_metadata: Metadata from the chunk containing bibtex path or document info collection_name: Name of the ChromaDB collection Returns: str: Full document text ordered by chunk sequence or None if not found

Parameters:

chunk_metadata: Parameter
collection_name: Parameter

Returns: See docstring for return details

`get_full_document_neo4j(self, chunk_uid)`

Purpose: Retrieve the full document from Neo4j that a chunk belongs to. Updated to match the actual Neo4j schema from offline_docstore_multi_vice.py Args: chunk_uid: UID of the text/table chunk Returns: str: Full document text ordered by chunk sequence or None if not found

Parameters:

chunk_uid: Parameter

Returns: See docstring for return details

`get_document_by_uuid(self, document_uid)`

Purpose: Retrieve a full document from Neo4j by its document UID. This is used for reference document retrieval from AI responses. Args: document_uid: UID of the document to retrieve Returns: dict: Document information with content, name, and metadata or None if not found

Parameters:

document_uid: Parameter

Returns: See docstring for return details

`summarize_document(self, document_text, query_context, max_tokens)`

Purpose: Summarize a full document with focus on the query context. Args: document_text: Full document text to summarize query_context: The user's query to focus the summary max_tokens: Maximum tokens for the summary Returns: str: Summarized document text

Parameters:

document_text: Parameter
query_context: Parameter
max_tokens: Parameter

Returns: See docstring for return details

`remove_duplicate_documents(self, documents_with_metadata, similarity_threshold)`

Purpose: Remove duplicate documents based on content similarity. Args: documents_with_metadata: List of (metadata, document_text) tuples similarity_threshold: Threshold for considering documents as duplicates Returns: List of unique documents

Parameters:

documents_with_metadata: Parameter
similarity_threshold: Parameter

Returns: See docstring for return details

`_get_detail_level_instructions(self, detail_level)`

Purpose: Get detail-level specific instructions for summarization. Args: detail_level: Summary, Balanced, Detailed, or Comprehensive Returns: tuple: (instruction_text, target_multiplier, expansion_guidance)

Parameters:

detail_level: Parameter

Returns: See docstring for return details

`create_initial_summary(self, document_text, query_context, target_tokens, detail_level)`

Purpose: Create an initial comprehensive summary from the first document. Args: document_text: Text of the first document query_context: The user's query for context target_tokens: Target length for the initial summary detail_level: Summary, Balanced, Detailed, or Comprehensive Returns: str: Initial comprehensive summary

Parameters:

document_text: Parameter
query_context: Parameter
target_tokens: Parameter
detail_level: Parameter

Returns: See docstring for return details

`enrich_summary(self, existing_summary, new_document, query_context, target_tokens, detail_level)`

Purpose: Enrich an existing summary with information from a new document. Implements smart chunking for large documents to avoid token limit violations. Args: existing_summary: Current comprehensive summary new_document: New document text to integrate query_context: User's query for context target_tokens: Target length for enriched summary detail_level: Summary, Balanced, Detailed, or Comprehensive Returns: str: Enriched comprehensive summary

Parameters:

existing_summary: Parameter
new_document: Parameter
query_context: Parameter
target_tokens: Parameter
detail_level: Parameter

Returns: See docstring for return details

`_enrich_with_chunking(self, existing_summary, new_document, query_context, target_tokens, detail_level)`

Purpose: Enrich summary by processing large documents in chunks to avoid token limits. Args: existing_summary: Current summary new_document: Large document to chunk and process query_context: User's query for context target_tokens: Target output tokens Returns: str: Enriched summary

Parameters:

existing_summary: Parameter
new_document: Parameter
query_context: Parameter
target_tokens: Parameter
detail_level: Parameter

Returns: See docstring for return details

`_split_document_into_chunks(self, document, max_chunk_tokens)`

Purpose: Split a document into chunks of approximately max_chunk_tokens size. Args: document: Text to split max_chunk_tokens: Maximum tokens per chunk Returns: list: List of document chunks

Parameters:

document: Parameter
max_chunk_tokens: Parameter

Returns: See docstring for return details

`consolidate_with_large_llm(self, summary, additional_documents, query_context, target_tokens, detail_level)`

Purpose: Use the large LLM to consolidate a summary with a batch of additional documents. This is more efficient than incremental enrichment for larger batches. Implements dynamic batch sizing to respect token limits. Args: summary: Current summary to consolidate additional_documents: List of document texts to integrate query_context: User's query for context target_tokens: Target length for consolidated summary detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive) Returns: str: Consolidated comprehensive summary

Parameters:

summary: Parameter
additional_documents: Parameter
query_context: Parameter
target_tokens: Parameter
detail_level: Parameter

Returns: See docstring for return details

`_consolidate_with_dynamic_batching(self, summary, additional_documents, query_context, target_tokens, detail_level)`

Purpose: Consolidate documents using dynamic batch sizing to respect token limits. Args: summary: Current summary additional_documents: List of document texts to integrate query_context: User's query for context target_tokens: Target length for consolidated summary detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive) Returns: str: Consolidated summary

Parameters:

summary: Parameter
additional_documents: Parameter
query_context: Parameter
target_tokens: Parameter
detail_level: Parameter

Returns: See docstring for return details

`_calculate_optimal_batch_size(self, current_summary, remaining_documents, target_tokens)`

Purpose: Calculate optimal batch size based on token limits and current state. Args: current_summary: Current summary text remaining_documents: List of remaining documents to process target_tokens: Target output tokens Returns: int: Optimal batch size

Parameters:

current_summary: Parameter
remaining_documents: Parameter
target_tokens: Parameter

Returns: See docstring for return details

`_consolidate_batch_with_large_llm(self, summary, batch_documents, query_context, target_tokens, detail_level)`

Purpose: Consolidate a specific batch with the large LLM.

Parameters:

summary: Parameter
batch_documents: Parameter
query_context: Parameter
target_tokens: Parameter
detail_level: Parameter

Returns: None

`consolidate_with_large_llm_and_citations(self, summary, additional_documents, document_metadata, query_context, target_tokens, start_block_num, use_inline_citations)`

Purpose: Use the large LLM to consolidate a summary with a batch of additional documents, with optional citations. Args: summary: Current summary to consolidate additional_documents: List of document texts to integrate document_metadata: List of metadata for each document query_context: User's query for context target_tokens: Target length for consolidated summary start_block_num: Starting block number for citations use_inline_citations: Whether to include inline citations (False for extensive search mode) Returns: str: Consolidated comprehensive summary with or without inline citations

Parameters:

summary: Parameter
additional_documents: Parameter
document_metadata: Parameter
query_context: Parameter
target_tokens: Parameter
start_block_num: Parameter
use_inline_citations: Parameter

Returns: See docstring for return details

`process_documents_with_source_tracking(self, documents, query_context, target_summary_tokens, batch_size, use_inline_citations, disable_citations, detail_level)`

Purpose: Process documents with improved source tracking and batch processing using large LLM. Args: documents: List of (metadata, content) tuples query_context: User's query for context target_summary_tokens: Target length for final summary batch_size: Number of documents to process in each batch with large LLM use_inline_citations: Whether to include inline citations (False for extensive search mode) disable_citations: Whether to completely disable citation logic (True for extensive search mode) detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive) Returns: tuple: (comprehensive_summary, source_mapping, individual_summaries)

Parameters:

documents: Parameter
query_context: Parameter
target_summary_tokens: Parameter
batch_size: Parameter
use_inline_citations: Parameter
disable_citations: Parameter
detail_level: Parameter

Returns: See docstring for return details

Required Imports

from typing import List
from typing import Any
from typing import Dict
import os
import panel as pn

Usage Example

# Example usage:
# result = ExtensiveSearchManager(bases)

Similar Components

AI-powered semantic similarity - components with related functionality:

class ExtensiveSearchManager 99.0% similar

Manages extensive search functionality including full document retrieval, summarization, and enhanced context gathering.
From: /tf/active/vicechatdev/OneCo_hybrid_RAG.py
function extensive_mode_example 53.0% similar

Demonstrates the usage of DocChatRAG's extensive mode for detailed document analysis with a sample query about methodologies.
From: /tf/active/vicechatdev/docchat/example_usage.py
class ReferenceManager_v4 51.5% similar

Manages extraction and formatting of references for LLM chat responses. Handles both file references and BibTeX citations, formatting them according to various academic citation styles.
From: /tf/active/vicechatdev/OneCo_hybrid_RAG.py
class ReferenceManager_v3 51.4% similar

Manages extraction and formatting of references for LLM chat responses. Handles both file references and BibTeX citations, formatting them according to various academic citation styles.
From: /tf/active/vicechatdev/OneCo_hybrid_RAG_old.py
class ReferenceManager_v2 50.6% similar

Manages extraction and formatting of references for LLM chat responses. Handles both file references and BibTeX citations, formatting them according to various academic citation styles.
From: /tf/active/vicechatdev/OneCo_hybrid_RAG copy.py

← Back to Browse

Assistant

Hi! I can help improve this code. Tell me what you'd like to enhance (e.g., "add error handling", "optimize performance", "improve readability", "add type hints").

Code Comparison

Original Code

                            class ExtensiveSearchManager:
    """
    Manages extensive search functionality including full document retrieval,
    summarization, and enhanced context gathering.
    """
    
    def __init__(self, session, chroma_client, api_key="", rag_instance=None):
        self.session = session
        self.chroma_client = chroma_client
        self.api_key = api_key
        self.rag_instance = rag_instance  # Reference to main RAG instance for usage tracking
        self.summarizer = ChatOpenAI(
            model="gpt-4o-mini",
            temperature=0,
            max_tokens=2000,
            api_key=api_key
        )
        
        # Set up tokenizer for counting
        self.tokenizer = tiktoken.get_encoding("cl100k_base")
        
        # Initialize caches
        self.document_cache = {}
        self.summary_cache = {}
    
    def count_tokens(self, text):
        """Count tokens in text"""
        return len(self.tokenizer.encode(text))
    
    def get_full_document(self, chunk_metadata, collection_name):
        """
        Retrieve the full document that a chunk belongs to by using Neo4j to find all chunks
        from the same document and reconstruct the complete document.
        
        Args:
            chunk_metadata: Metadata from the chunk containing bibtex path or document info
            collection_name: Name of the ChromaDB collection  
            
        Returns:
            str: Full document text ordered by chunk sequence or None if not found
        """
        # Extract bibtex path or try to find document identifier
        bibtex_path = chunk_metadata.get('bibtex', '')
        if not bibtex_path:
            return None
            
        # Create cache key
        cache_key = f"{collection_name}_{bibtex_path}"
        if cache_key in self.document_cache:
            return self.document_cache[cache_key]
        
        try:
            # The bibtex path in metadata should correspond to the document path
            # First, try to find the document directly by path
            query = """
            MATCH (d:Document)
            WHERE d.Path = $bibtex_path OR d.Name CONTAINS $doc_name OR toString(d.UID) = $bibtex_path
            MATCH (d)-[:CHUNK]->(c)
            WHERE c:Text_chunk OR c:Table_chunk
            RETURN c.Text AS content, c.UID AS chunk_uid, c.Name AS chunk_name, d.UID AS doc_uid, d.Name AS doc_name
            ORDER BY c.Name
            """
            
            # Extract potential document name from bibtex path
            doc_name = bibtex_path.split('/')[-1] if '/' in bibtex_path else bibtex_path
            
            results = self.session.run(query, {
                "bibtex_path": bibtex_path, 
                "doc_name": doc_name
            })
            
            chunks = []
            doc_info = None
            
            for record in results:
                if record["content"]:
                    chunks.append(record["content"])
                    if not doc_info:
                        doc_info = {
                            "doc_uid": record["doc_uid"],
                            "doc_name": record["doc_name"]
                        }
            
            # If direct path matching didn't work, try fuzzy matching on document content paths
            if not chunks:
                # Extract filename without extension for broader matching
                if '.' in doc_name:
                    base_name = doc_name.rsplit('.', 1)[0]
                else:
                    base_name = doc_name
                    
                fuzzy_query = """
                MATCH (d:Document)-[:CHUNK]->(c)
                WHERE (c:Text_chunk OR c:Table_chunk) 
                AND (d.Name CONTAINS $base_name OR d.Path CONTAINS $base_name 
                     OR any(path_part IN split(d.Path, '/') WHERE path_part CONTAINS $base_name))
                RETURN c.Text AS content, c.UID AS chunk_uid, c.Name AS chunk_name, 
                       d.UID AS doc_uid, d.Name AS doc_name, d.Path AS doc_path
                ORDER BY d.UID, c.Name
                LIMIT 100
                """
                
                results = self.session.run(fuzzy_query, {"base_name": base_name})
                
                # Group by document to find the best match
                doc_candidates = {}
                for record in results:
                    doc_uid = record["doc_uid"]
                    if doc_uid not in doc_candidates:
                        doc_candidates[doc_uid] = {
                            "chunks": [],
                            "doc_name": record["doc_name"],
                            "doc_path": record["doc_path"]
                        }
                    if record["content"]:
                        doc_candidates[doc_uid]["chunks"].append(record["content"])
                
                # Pick the document with the most chunks (likely the most complete)
                if doc_candidates:
                    best_doc = max(doc_candidates.items(), key=lambda x: len(x[1]["chunks"]))
                    chunks = best_doc[1]["chunks"]
                    doc_info = {
                        "doc_uid": best_doc[0],
                        "doc_name": best_doc[1]["doc_name"],
                        "doc_path": best_doc[1]["doc_path"]
                    }
                    print(f"         📄 Fuzzy matched document: {doc_info['doc_name']} with {len(chunks)} chunks")
            
            if chunks:
                # Concatenate all chunks to reconstruct the full document
                full_document = "\n\n".join(chunks)
                self.document_cache[cache_key] = full_document
                print(f"         ✅ Full document retrieved: {len(chunks)} chunks, {len(full_document)} chars")
                if doc_info:
                    print(f"         📄 Document: {doc_info['doc_name']} (UID: {doc_info['doc_uid']})")
                return full_document
            else:
                print(f"         ⚠️  No chunks found for document path: {bibtex_path}")
                
        except Exception as e:
            print(f"         ❌ Error retrieving full document from Neo4j: {e}")
        
        return None
    
    def get_full_document_neo4j(self, chunk_uid):
        """
        Retrieve the full document from Neo4j that a chunk belongs to.
        Updated to match the actual Neo4j schema from offline_docstore_multi_vice.py
        
        Args:
            chunk_uid: UID of the text/table chunk
            
        Returns:
            str: Full document text ordered by chunk sequence or None if not found
        """
        cache_key = f"neo4j_{chunk_uid}"
        if cache_key in self.document_cache:
            return self.document_cache[cache_key]
        
        try:
            # Query based on the actual schema: Document-[:CHUNK]->Text_chunk/Table_chunk
            query = """
            MATCH (chunk {UID: $chunk_uid})<-[:CHUNK]-(doc:Document)
            MATCH (doc)-[:CHUNK]->(all_chunks)
            WHERE all_chunks:Text_chunk OR all_chunks:Table_chunk
            RETURN all_chunks.Text AS content, all_chunks.UID AS chunk_uid, 
                   all_chunks.Name AS chunk_name, doc.UID AS doc_uid, doc.Name AS doc_name
            ORDER BY all_chunks.Name
            """
            
            results = self.session.run(query, {"chunk_uid": chunk_uid})
            chunks = []
            doc_info = None
            
            for record in results:
                if record["content"]:
                    chunks.append(record["content"])
                    if not doc_info:
                        doc_info = {
                            "doc_uid": record["doc_uid"],
                            "doc_name": record["doc_name"]
                        }
            
            if chunks:
                # Concatenate all chunks to reconstruct the full document
                full_document = "\n\n".join(chunks)
                self.document_cache[cache_key] = full_document
                print(f"✅ Neo4j full document retrieved: {len(chunks)} chunks, {len(full_document)} chars from {doc_info['doc_name']}")
                return full_document
            else:
                print(f"⚠️  No Neo4j document found for chunk UID: {chunk_uid}")
                
        except Exception as e:
            print(f"❌ Error retrieving full document from Neo4j: {e}")
        
        return None
    
    def get_document_by_uuid(self, document_uid):
        """
        Retrieve a full document from Neo4j by its document UID.
        This is used for reference document retrieval from AI responses.
        
        Args:
            document_uid: UID of the document to retrieve
            
        Returns:
            dict: Document information with content, name, and metadata or None if not found
        """
        cache_key = f"doc_uuid_{document_uid}"
        if cache_key in self.document_cache:
            return self.document_cache[cache_key]
        
        try:
            print(f"🔍 Retrieving document by UUID: {document_uid}")
            
            # Query to get document info and all its chunks
            query = """
            MATCH (doc:Document {UID: $document_uid})
            OPTIONAL MATCH (doc)-[:CHUNK]->(chunk)
            WHERE chunk:Text_chunk OR chunk:Table_chunk
            RETURN doc.Name AS doc_name, doc.Type AS doc_type, doc.File AS doc_file,
                   collect(chunk.Text) AS chunk_texts, collect(chunk.Name) AS chunk_names,
                   count(chunk) AS chunk_count
            """
            
            result = self.session.run(query, {"document_uid": document_uid})
            record = result.single()
            
            if not record:
                print(f"⚠️  No document found with UID: {document_uid}")
                return None
            
            # Extract document info
            doc_info = {
                "uid": document_uid,
                "name": record["doc_name"],
                "type": record["doc_type"],
                "file": record["doc_file"],
                "chunk_count": record["chunk_count"]
            }
            
            # Combine all chunk texts to reconstruct the full document
            chunk_texts = [text for text in record["chunk_texts"] if text]
            if chunk_texts:
                full_content = "\n\n".join(chunk_texts)
                doc_info["content"] = full_content
                doc_info["content_length"] = len(full_content)
                
                # Cache the result
                self.document_cache[cache_key] = doc_info
                
                print(f"✅ Document retrieved: {doc_info['name']} ({doc_info['chunk_count']} chunks, {len(full_content)} chars)")
                return doc_info
            else:
                print(f"⚠️  Document found but no content chunks: {doc_info['name']}")
                return None
                
        except Exception as e:
            print(f"❌ Error retrieving document by UUID {document_uid}: {e}")
            return None
    
    def summarize_document(self, document_text, query_context, max_tokens=1500):
        """
        Summarize a full document with focus on the query context.
        
        Args:
            document_text: Full document text to summarize
            query_context: The user's query to focus the summary
            max_tokens: Maximum tokens for the summary
            
        Returns:
            str: Summarized document text
        """
        print(f"    🤖 Using small LLM (gpt-4o-mini) for document summarization")
        print(f"       • Input document length: {self.count_tokens(document_text):,} tokens")
        print(f"       • Target summary length: {max_tokens:,} tokens")
        print(f"       • Query-focused: '{query_context[:100]}{'...' if len(query_context) > 100 else ''}'")
        
        # Track usage if RAG instance is available
        if self.rag_instance:
            self.rag_instance.small_llm_usage["document_summarization"] += 1
            self.rag_instance.small_llm_usage["total_calls"] += 1
        
        # Check cache first
        cache_key = f"{hash(document_text[:500])}_{hash(query_context)}"
        if cache_key in self.summary_cache:
            print(f"       ✅ Using cached summary")
            return self.summary_cache[cache_key]
        
        # If document is already short enough, return as is
        if self.count_tokens(document_text) <= max_tokens:
            print(f"       ✅ Document already under token limit, using full text")
            return document_text
        
        # Create summarization prompt
        prompt = f"""
        You are a document summarization expert. Please summarize the following document with particular focus on information relevant to this query: "{query_context}"

        Instructions:
        - Maintain all key factual information relevant to the query
        - Preserve important technical details, numbers, and citations
        - Keep the summary under {max_tokens} tokens
        - Structure the summary logically with clear sections if applicable
        - Retain any critical background information needed to understand the main points

        Document to summarize:
        {document_text}

        Summary:
        """
        
        try:
            print(f"       🔄 Generating query-focused summary...")
            response = self.summarizer.invoke(prompt)
            summary = response.content.strip()
            summary_tokens = self.count_tokens(summary)
            print(f"       ✅ Summary generated: {summary_tokens:,} tokens ({100*summary_tokens/self.count_tokens(document_text):.1f}% of original)")
            
            # Cache the summary
            self.summary_cache[cache_key] = summary
            return summary
            
        except Exception as e:
            print(f"       ❌ Error summarizing document: {e}")
            print(f"       🔄 Fallback: truncating document to {max_tokens} tokens")
            # Fallback: truncate the document
            tokens = self.tokenizer.encode(document_text)
            if len(tokens) > max_tokens:
                truncated_tokens = tokens[:max_tokens]
                return self.tokenizer.decode(truncated_tokens)
            return document_text
    
    def remove_duplicate_documents(self, documents_with_metadata, similarity_threshold=0.85):
        """
        Remove duplicate documents based on content similarity.
        
        Args:
            documents_with_metadata: List of (metadata, document_text) tuples
            similarity_threshold: Threshold for considering documents as duplicates
            
        Returns:
            List of unique documents
        """
        if len(documents_with_metadata) <= 1:
            return documents_with_metadata
        
        unique_docs = []
        seen_embeddings = []
        
        try:
            # Use OpenAI embeddings for similarity comparison
            from openai import OpenAI
            client = OpenAI(api_key=self.api_key)
            
            for metadata, doc_text in documents_with_metadata:
                # Generate embedding for current document
                if self.count_tokens(doc_text) > 8192:
                    # Use first part of document for embedding if too long
                    embed_text = self.tokenizer.decode(self.tokenizer.encode(doc_text)[:8192])
                else:
                    embed_text = doc_text
                
                response = client.embeddings.create(
                    model="text-embedding-3-small",
                    input=embed_text
                )
                current_embedding = np.array(response.data[0].embedding)
                
                # Normalize embedding
                current_embedding = current_embedding / np.linalg.norm(current_embedding)
                
                # Check for similarity with existing documents
                is_duplicate = False
                for seen_emb in seen_embeddings:
                    similarity = np.dot(current_embedding, seen_emb)
                    if similarity >= similarity_threshold:
                        is_duplicate = True
                        break
                
                if not is_duplicate:
                    unique_docs.append((metadata, doc_text))
                    seen_embeddings.append(current_embedding)
            
            return unique_docs
            
        except Exception as e:
            print(f"Error in duplicate removal: {e}")
            # Fallback: return all documents
            return documents_with_metadata
    
    def _get_detail_level_instructions(self, detail_level):
        """
        Get detail-level specific instructions for summarization.
        
        Args:
            detail_level: Summary, Balanced, Detailed, or Comprehensive
            
        Returns:
            tuple: (instruction_text, target_multiplier, expansion_guidance)
        """
        if detail_level == "Summary":
            return (
                """- Focus on key points, main findings, and essential conclusions
                - Use concise language while maintaining accuracy
                - Prioritize the most important information relevant to the query
                - Include only critical data points and major insights
                - Structure information clearly but without excessive detail""",
                0.7,  # Use 70% of target tokens
                "concise but comprehensive coverage of key points"
            )
        elif detail_level == "Balanced":
            return (
                """- Provide a well-balanced mix of overview and detail
                - Include important findings with supporting context
                - Balance breadth of coverage with depth of explanation
                - Include relevant data, methods, and key insights
                - Provide sufficient detail for understanding without overwhelming""",
                1.0,  # Use 100% of target tokens
                "balanced coverage with appropriate level of detail"
            )
        elif detail_level == "Detailed":
            return (
                """- Provide comprehensive coverage with extensive detail
                - Include detailed explanations of methods, procedures, and findings
                - Expand on technical specifications and quantitative data
                - Include background information and theoretical foundations
                - Provide extensive context and implications
                - Include specific examples, case studies, and applications""",
                1.3,  # Use 130% of target tokens (system will compress as needed)
                "comprehensive and detailed coverage with extensive explanations"
            )
        else:  # Comprehensive
            return (
                """- Provide exhaustive coverage of all relevant information
                - Include comprehensive explanations with maximum detail
                - Cover all methodological details, technical specifications, and data
                - Provide extensive background, context, and theoretical foundations
                - Include all examples, case studies, applications, and implications
                - Expand on every relevant concept with full explanations
                - Include detailed analysis and comprehensive interpretation""",
                1.5,  # Use 150% of target tokens (system will compress as needed)
                "exhaustive and comprehensive coverage with maximum detail"
            )
    
    def create_initial_summary(self, document_text, query_context, target_tokens=2000, detail_level="Balanced"):
        """
        Create an initial comprehensive summary from the first document.
        
        Args:
            document_text: Text of the first document
            query_context: The user's query for context
            target_tokens: Target length for the initial summary
            detail_level: Summary, Balanced, Detailed, or Comprehensive
            
        Returns:
            str: Initial comprehensive summary
        """
        print(f"    🤖 Using small LLM (gpt-4o-mini) for initial comprehensive summary")
        print(f"       • Creating foundation summary from first document")
        print(f"       • Detail level: {detail_level}")
        
        # Track usage if RAG instance is available
        if self.rag_instance:
            self.rag_instance.small_llm_usage["document_summarization"] += 1
            self.rag_instance.small_llm_usage["total_calls"] += 1
        
        # Get detail-level specific instructions
        detail_instructions, target_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
        adjusted_target = int(target_tokens * target_multiplier)
        
        # If document is short enough, use as foundation
        if self.count_tokens(document_text) <= adjusted_target:
            print(f"       ✅ Document under target length, using as foundation")
            return f"**Comprehensive Summary ({detail_level} level): {query_context}**\n\n{document_text}"
        
        prompt = f"""
        You are creating the initial comprehensive summary for the user's query. This will be the foundation that gets enriched with additional documents.

        User Query: "{query_context}"
        Detail Level: {detail_level}

        Instructions:
        - Create a structured, comprehensive summary that directly addresses the user's query
        - Organize information into clear sections with headers and subheadings
        - Use a format that can be easily expanded with additional information
        - CRITICAL: Aim for EXACTLY {adjusted_target} tokens - focus on {expansion_guidance}
        - Use comprehensive markdown formatting with multiple levels of structure
        
        Detail Level Specific Instructions:
        {detail_instructions}

        Document to summarize:
        {document_text}

        Comprehensive Summary ({detail_level} level, MUST be approximately {adjusted_target:,} tokens):
        """
        
        try:
            print(f"       🔄 Generating foundation summary...")
            response = self.summarizer.invoke(prompt)
            summary = response.content.strip()
            summary_tokens = self.count_tokens(summary)
            print(f"       ✅ Foundation summary created: {summary_tokens:,} tokens")
            return summary
            
        except Exception as e:
            print(f"       ❌ Error creating initial summary: {e}")
            # Fallback: truncate document
            tokens = self.tokenizer.encode(document_text)
            if len(tokens) > adjusted_target:
                truncated_tokens = tokens[:adjusted_target]
                return f"**Comprehensive Summary ({detail_level} level): {query_context}**\n\n" + self.tokenizer.decode(truncated_tokens)
            return f"**Comprehensive Summary ({detail_level} level): {query_context}**\n\n{document_text}"
    
    def enrich_summary(self, existing_summary, new_document, query_context, target_tokens=8000, detail_level="Balanced"):
        """
        Enrich an existing summary with information from a new document.
        Implements smart chunking for large documents to avoid token limit violations.
        
        Args:
            existing_summary: Current comprehensive summary
            new_document: New document text to integrate
            query_context: User's query for context
            target_tokens: Target length for enriched summary
            detail_level: Summary, Balanced, Detailed, or Comprehensive
            
        Returns:
            str: Enriched comprehensive summary
        """
        print(f"    🤖 Using small LLM (gpt-4o-mini) for summary enrichment")
        print(f"       • Integrating new document into existing summary")
        print(f"       • Detail level: {detail_level}")
        
        # Track usage if RAG instance is available
        if self.rag_instance:
            self.rag_instance.small_llm_usage["document_summarization"] += 1
            self.rag_instance.small_llm_usage["total_calls"] += 1
        
        # Get detail-level specific instructions
        detail_instructions, target_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
        adjusted_target = int(target_tokens * target_multiplier)
        
        existing_tokens = self.count_tokens(existing_summary)
        new_doc_tokens = self.count_tokens(new_document)
        
        print(f"       • Existing summary: {existing_tokens:,} tokens")
        print(f"       • New document: {new_doc_tokens:,} tokens")
        print(f"       • Target tokens: {adjusted_target:,}")
        
        # Calculate available space for expansion
        available_expansion = adjusted_target - existing_tokens
        print(f"       • Available expansion space: {available_expansion:,} tokens")
        
        # Check if input will exceed context limits for gpt-4o-mini (128k context)
        prompt_overhead = 1000  # Estimated prompt overhead
        max_context_mini = 125000  # Conservative limit for gpt-4o-mini
        total_input = existing_tokens + new_doc_tokens + prompt_overhead
        
        if total_input > max_context_mini:
            print(f"       ⚠️  Input too large ({total_input:,} tokens), implementing document chunking...")
            return self._enrich_with_chunking(existing_summary, new_document, query_context, adjusted_target, detail_level)
        
        prompt = f"""
        You are enriching a comprehensive summary with new information. Your task is to integrate the new document's relevant information into the existing summary while expanding its scope and detail.

        User Query: "{query_context}"
        Detail Level: {detail_level}

        Instructions:
        - EXPAND the existing summary by integrating relevant information from the new document
        - Add new sections if the new document covers different aspects
        - Enrich existing sections with additional details, data, and insights from the new document
        - Avoid redundancy - don't repeat information already covered
        - Maintain the structured, organized format
        - Prioritize information most relevant to the user's query
        - TARGET LENGTH: approximately {adjusted_target} tokens (focus on {expansion_guidance})
        - Preserve all important details, facts, figures, and specific findings
        - When adding information, be specific about sources and context
        
        Detail Level Specific Instructions:
        {detail_instructions}

        CURRENT COMPREHENSIVE SUMMARY ({existing_tokens:,} tokens):
        {existing_summary}

        NEW DOCUMENT TO INTEGRATE ({new_doc_tokens:,} tokens):
        {new_document}

        ENRICHED COMPREHENSIVE SUMMARY ({detail_level} level, target: ~{adjusted_target:,} tokens):
        """
        
        try:
            print(f"       🔄 Enriching summary with new information...")
            response = self.summarizer.invoke(prompt)
            enriched_summary = response.content.strip()
            enriched_tokens = self.count_tokens(enriched_summary)
            compression_ratio = (existing_tokens + new_doc_tokens) / enriched_tokens if enriched_tokens > 0 else 1
            print(f"       ✅ Summary enriched: {enriched_tokens:,} tokens (compression: {compression_ratio:.1f}x)")
            return enriched_summary
            
        except Exception as e:
            print(f"       ❌ Error enriching summary: {e}")
            # Fallback to chunking if regular approach fails
            print(f"       🔄 Falling back to document chunking...")
            return self._enrich_with_chunking(existing_summary, new_document, query_context, adjusted_target, detail_level)

    def _enrich_with_chunking(self, existing_summary, new_document, query_context, target_tokens, detail_level="Balanced"):
        """
        Enrich summary by processing large documents in chunks to avoid token limits.
        
        Args:
            existing_summary: Current summary
            new_document: Large document to chunk and process
            query_context: User's query for context
            target_tokens: Target output tokens
            
        Returns:
            str: Enriched summary
        """
        print(f"       📊 Chunking large document for processing")
        
        # Calculate chunk size based on available context
        existing_tokens = self.count_tokens(existing_summary)
        max_context = 125000
        prompt_overhead = 1000
        available_for_doc = max_context - existing_tokens - prompt_overhead
        
        # Split document into manageable chunks
        chunk_size = min(available_for_doc // 2, 40000)  # Conservative chunk size
        chunks = self._split_document_into_chunks(new_document, chunk_size)
        
        print(f"       📄 Split document into {len(chunks)} chunks (max {chunk_size:,} tokens each)")
        
        current_summary = existing_summary
        
        for i, chunk in enumerate(chunks):
            print(f"       🔄 Processing chunk {i+1}/{len(chunks)}")
            
            chunk_tokens = self.count_tokens(chunk)
            current_tokens = self.count_tokens(current_summary)
            
            # Check if we've reached target
            if current_tokens >= target_tokens:
                print(f"       🎯 Reached target tokens ({current_tokens:,}/{target_tokens:,}), stopping chunk processing")
                break
            
            # Process this chunk
            prompt = f"""
            You are enriching a comprehensive summary with a chunk of new information. Focus on integrating the most relevant and important information from this chunk.

            User Query: "{query_context}"

            Instructions:
            - EXPAND the existing summary by integrating relevant information from this document chunk
            - Add new sections or enrich existing sections as appropriate
            - Avoid redundancy with information already in the summary
            - Prioritize information most relevant to the user's query
            - TARGET LENGTH: approximately {target_tokens} tokens
            - Be comprehensive but avoid unnecessary repetition

            CURRENT COMPREHENSIVE SUMMARY ({current_tokens:,} tokens):
            {current_summary}

            DOCUMENT CHUNK TO INTEGRATE (chunk {i+1}/{len(chunks)}, {chunk_tokens:,} tokens):
            {chunk}

            ENRICHED COMPREHENSIVE SUMMARY (target: ~{target_tokens:,} tokens):
            """
            
            try:
                response = self.summarizer.invoke(prompt)
                current_summary = response.content.strip()
                new_tokens = self.count_tokens(current_summary)
                print(f"       ✅ Chunk processed: {new_tokens:,} tokens")
                
            except Exception as e:
                print(f"       ❌ Error processing chunk {i+1}: {e}")
                # Skip this chunk and continue
                continue
        
        final_tokens = self.count_tokens(current_summary)
        print(f"       ✅ Chunked processing complete: {final_tokens:,} tokens")
        return current_summary

    def _split_document_into_chunks(self, document, max_chunk_tokens):
        """
        Split a document into chunks of approximately max_chunk_tokens size.
        
        Args:
            document: Text to split
            max_chunk_tokens: Maximum tokens per chunk
            
        Returns:
            list: List of document chunks
        """
        # Use tiktoken for accurate token counting
        tokens = self.tokenizer.encode(document)
        
        chunks = []
        for i in range(0, len(tokens), max_chunk_tokens):
            chunk_tokens = tokens[i:i + max_chunk_tokens]
            chunk_text = self.tokenizer.decode(chunk_tokens)
            chunks.append(chunk_text)
        
        return chunks
    
    def consolidate_with_large_llm(self, summary, additional_documents, query_context, target_tokens=8000, detail_level="Balanced"):
        """
        Use the large LLM to consolidate a summary with a batch of additional documents.
        This is more efficient than incremental enrichment for larger batches.
        Implements dynamic batch sizing to respect token limits.
        
        Args:
            summary: Current summary to consolidate
            additional_documents: List of document texts to integrate
            query_context: User's query for context
            target_tokens: Target length for consolidated summary
            detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive)
            
        Returns:
            str: Consolidated comprehensive summary
        """
        print(f"    🧠 Using large LLM for batch consolidation")
        print(f"       • Consolidating summary with {len(additional_documents)} documents")
        
        # Track usage if RAG instance is available
        if self.rag_instance:
            self.rag_instance.large_llm_usage += 1
        
        summary_tokens = self.count_tokens(summary)
        
        # Check total token count and implement dynamic batching if necessary
        docs_text = "\n\n---DOCUMENT SEPARATOR---\n\n".join(additional_documents)
        docs_tokens = self.count_tokens(docs_text)
        
        print(f"       • Current summary: {summary_tokens:,} tokens")
        print(f"       • Additional documents: {docs_tokens:,} tokens")
        print(f"       • Target output: {target_tokens:,} tokens")
        
        # Calculate total input tokens including prompt overhead (approximately 1000 tokens)
        prompt_overhead = 1000
        total_input_tokens = summary_tokens + docs_tokens + prompt_overhead
        max_context_length = 125000  # Conservative limit for gpt-4o (128k context)
        
        # If input exceeds context limit, implement dynamic batching
        if total_input_tokens > max_context_length:
            print(f"       ⚠️  Input too large ({total_input_tokens:,} tokens), implementing dynamic batching...")
            return self._consolidate_with_dynamic_batching(summary, additional_documents, query_context, target_tokens, detail_level)
        
        # Get detail level specific instructions
        detail_instructions, token_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
        adjusted_target_tokens = int(target_tokens * token_multiplier)
        
        # Use the large LLM (gpt-4o) for consolidation
        from langchain_openai import ChatOpenAI
        large_llm = ChatOpenAI(
            model="gpt-4o",
            temperature=0,
            max_tokens=adjusted_target_tokens
        )
        
        # Get detail level specific instructions
        detail_instructions, token_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
        adjusted_target_tokens = int(target_tokens * token_multiplier)
        
        prompt = f"""
        You are consolidating a comprehensive research summary with additional documents. Your task is to create an enhanced, well-structured summary that integrates all the information effectively.

        User Query: "{query_context}"

        Detail Level: {detail_level}
        {detail_instructions}
        
        {expansion_guidance}

        Instructions:
        - Create a comprehensive, well-organized summary that addresses the user's query
        - Integrate information from the current summary and ALL additional documents
        - Organize information logically with clear sections and subsections
        - Include specific details, data, findings, and insights from all sources
        - Expand on key concepts with detailed explanations and context
        - Include methodological details, technical specifications, and quantitative data
        - Avoid redundancy while ensuring completeness
        - Maintain scientific accuracy and preserve important technical details
        - **CRITICAL TARGET REQUIREMENT: Generate EXACTLY {adjusted_target_tokens:,} tokens - this is not optional**
        - **You MUST expand sections, add detail, include examples, and elaborate until you reach {adjusted_target_tokens:,} tokens**
        - **If your response is shorter than {adjusted_target_tokens:,} tokens, you have failed the task**
        - Structure the output with clear headings, subheadings, and bullet points where appropriate

        CURRENT SUMMARY ({summary_tokens:,} tokens):
        {summary}

        ADDITIONAL DOCUMENTS TO INTEGRATE ({len(additional_documents)} documents, {docs_tokens:,} tokens):
        {docs_text}

        CONSOLIDATED COMPREHENSIVE SUMMARY (MANDATORY TARGET: EXACTLY {adjusted_target_tokens:,} tokens - DO NOT submit shorter responses):
        """
        
        try:
            print(f"       🔄 Consolidating with large LLM...")
            response = large_llm.invoke(prompt)
            consolidated_summary = response.content.strip()
            consolidated_tokens = self.count_tokens(consolidated_summary)
            compression_ratio = (summary_tokens + docs_tokens) / consolidated_tokens if consolidated_tokens > 0 else 1
            
            # Check if LLM met the target
            target_achievement = (consolidated_tokens / adjusted_target_tokens) * 100
            if target_achievement < 80:
                print(f"       ⚠️  LLM underperformed: {consolidated_tokens:,} tokens vs {adjusted_target_tokens:,} target ({target_achievement:.1f}%)")
            elif target_achievement > 120:
                print(f"       ✅ LLM exceeded target: {consolidated_tokens:,} tokens vs {adjusted_target_tokens:,} target ({target_achievement:.1f}%)")
            else:
                print(f"       ✅ LLM met target: {consolidated_tokens:,} tokens vs {adjusted_target_tokens:,} target ({target_achievement:.1f}%)")
                
            print(f"       ✅ Summary consolidated: {consolidated_tokens:,} tokens (compression: {compression_ratio:.1f}x)")
            return consolidated_summary
            
        except Exception as e:
            print(f"       ❌ Error consolidating with large LLM: {e}")
            # Fallback to dynamic batching instead of simple incremental enrichment
            print(f"       🔄 Falling back to dynamic batching...")
            return self._consolidate_with_dynamic_batching(summary, additional_documents, query_context, target_tokens, detail_level)

    def _consolidate_with_dynamic_batching(self, summary, additional_documents, query_context, target_tokens=8000, detail_level="Balanced"):
        """
        Consolidate documents using dynamic batch sizing to respect token limits.
        
        Args:
            summary: Current summary
            additional_documents: List of document texts to integrate
            query_context: User's query for context
            target_tokens: Target length for consolidated summary
            detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive)
            
        Returns:
            str: Consolidated summary
        """
        print(f"       📊 Dynamic batching: Processing {len(additional_documents)} documents in token-aware batches")
        
        current_summary = summary
        summary_tokens = self.count_tokens(summary)
        
        i = 0
        while i < len(additional_documents):
            # Determine optimal batch size based on current token counts
            batch_size = self._calculate_optimal_batch_size(
                current_summary, additional_documents[i:], target_tokens
            )
            
            # Process the batch
            batch_end = min(i + batch_size, len(additional_documents))
            batch = additional_documents[i:batch_end]
            
            print(f"       🔄 Dynamic batch {(i//batch_size)+1}: processing {len(batch)} documents (docs {i+1}-{batch_end})")
            
            if len(batch) == 1:
                # Single document: use enrichment instead of large LLM
                current_summary = self.enrich_summary(current_summary, batch[0], query_context, target_tokens, detail_level)
            else:
                # Multiple documents: try large LLM with current batch
                try:
                    batch_text = "\n\n---DOCUMENT SEPARATOR---\n\n".join(batch)
                    batch_tokens = self.count_tokens(batch_text)
                    current_tokens = self.count_tokens(current_summary)
                    
                    # Check if this batch will fit
                    total_input = current_tokens + batch_tokens + 1000  # 1000 for prompt overhead
                    if total_input > 125000:
                        print(f"       ⚠️  Batch still too large ({total_input:,} tokens), processing individually...")
                        # Process each document individually
                        for doc in batch:
                            current_summary = self.enrich_summary(current_summary, doc, query_context, target_tokens, detail_level)
                    else:
                        # Use large LLM for batch
                        current_summary = self._consolidate_batch_with_large_llm(
                            current_summary, batch, query_context, target_tokens, detail_level
                        )
                        
                except Exception as e:
                    print(f"       ❌ Error in dynamic batch processing: {e}")
                    # Fallback to individual processing
                    for doc in batch:
                        current_summary = self.enrich_summary(current_summary, doc, query_context, target_tokens, detail_level)
            
            i = batch_end
            
            # Check if we've reached target token size
            current_tokens = self.count_tokens(current_summary)
            if current_tokens >= target_tokens:
                print(f"       🎯 Reached target tokens ({current_tokens:,}/{target_tokens:,}), stopping dynamic batching")
                break
        
        return current_summary

    def _calculate_optimal_batch_size(self, current_summary, remaining_documents, target_tokens):
        """
        Calculate optimal batch size based on token limits and current state.
        
        Args:
            current_summary: Current summary text
            remaining_documents: List of remaining documents to process
            target_tokens: Target output tokens
            
        Returns:
            int: Optimal batch size
        """
        if not remaining_documents:
            return 0
            
        summary_tokens = self.count_tokens(current_summary)
        max_context = 125000  # Conservative limit
        prompt_overhead = 1000
        available_tokens = max_context - summary_tokens - prompt_overhead
        
        # Start with batch size of 1 and increase until we hit limits
        batch_size = 1
        cumulative_tokens = 0
        
        for i, doc in enumerate(remaining_documents):
            doc_tokens = self.count_tokens(doc)
            if cumulative_tokens + doc_tokens > available_tokens:
                break
            cumulative_tokens += doc_tokens
            batch_size = i + 1
            
            # Cap at reasonable batch size to avoid extremely long processing
            if batch_size >= 10:
                break
                
        return max(1, batch_size)  # Ensure at least 1 document

    def _consolidate_batch_with_large_llm(self, summary, batch_documents, query_context, target_tokens, detail_level="Balanced"):
        """
        Consolidate a specific batch with the large LLM.
        """
        # Get detail level specific instructions
        detail_instructions, token_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
        adjusted_target_tokens = int(target_tokens * token_multiplier)
        
        from langchain_openai import ChatOpenAI
        large_llm = ChatOpenAI(
            model="gpt-4o",
            temperature=0,
            max_tokens=adjusted_target_tokens
        )
        
        docs_text = "\n\n---DOCUMENT SEPARATOR---\n\n".join(batch_documents)
        
        prompt = f"""
        You are consolidating a comprehensive research summary with a batch of additional documents. Integrate all information effectively.

        User Query: "{query_context}"
        
        Detail Level: {detail_level}
        {detail_instructions}
        
        {expansion_guidance}

        Instructions:
        - Enhance the existing summary with information from ALL batch documents
        - Organize information logically with clear sections and subsections
        - Include specific details, data, findings, and insights from all sources
        - Avoid redundancy while ensuring completeness
        - IMPORTANT: Aim for approximately {adjusted_target_tokens} tokens - be comprehensive and detailed
        - Structure with clear headings and formatting

        CURRENT SUMMARY:
        {summary}

        BATCH DOCUMENTS TO INTEGRATE:
        {docs_text}

        ENHANCED COMPREHENSIVE SUMMARY:
        """
        
        response = large_llm.invoke(prompt)
        return response.content.strip()
    
    def consolidate_with_large_llm_and_citations(self, summary, additional_documents, document_metadata, query_context, target_tokens=8000, start_block_num=1, use_inline_citations=True):
        """
        Use the large LLM to consolidate a summary with a batch of additional documents, with optional citations.
        
        Args:
            summary: Current summary to consolidate
            additional_documents: List of document texts to integrate
            document_metadata: List of metadata for each document
            query_context: User's query for context
            target_tokens: Target length for consolidated summary
            start_block_num: Starting block number for citations
            use_inline_citations: Whether to include inline citations (False for extensive search mode)
            
        Returns:
            str: Consolidated comprehensive summary with or without inline citations
        """
        print(f"    🧠 Using large LLM for batch consolidation {'with citations' if use_inline_citations else 'without citations'}")
        print(f"       • Consolidating summary with {len(additional_documents)} documents")
        
        # Track usage if RAG instance is available
        if self.rag_instance:
            self.rag_instance.large_llm_usage += 1
        
        summary_tokens = self.count_tokens(summary) if summary else 0
        
        # Prepare documents - with or without block numbers for citation
        if use_inline_citations:
            numbered_documents = []
            block_nums = []
            for i, (doc, metadata) in enumerate(zip(additional_documents, document_metadata)):
                # Calculate the actual block number that will be used for this document
                # The first block is the comprehensive summary, then individual sources follow
                block_num = start_block_num + 1 + i  # +1 because comprehensive summary takes the first block
                block_nums.append(block_num)
                title = metadata.get('title', f"Document {i+1}")
                numbered_doc = f"**[block {block_num}] {title}**\n{doc}"
                numbered_documents.append(numbered_doc)
        else:
            # For extensive search mode: simple document preparation without block numbers
            numbered_documents = []
            block_nums = []
            for i, (doc, metadata) in enumerate(zip(additional_documents, document_metadata)):
                title = metadata.get('title', f"Document {i+1}")
                numbered_doc = f"**{title}**\n{doc}"
                numbered_documents.append(numbered_doc)
        
        docs_text = "\n\n---DOCUMENT SEPARATOR---\n\n".join(numbered_documents)
        docs_tokens = self.count_tokens(docs_text)
        
        print(f"       • Current summary: {summary_tokens:,} tokens")
        print(f"       • Additional documents: {docs_tokens:,} tokens")
        print(f"       • Target output: {target_tokens:,} tokens")
        if use_inline_citations:
            print(f"       • Block numbers: {block_nums}")
        
        # Use the large LLM (gpt-4o) for consolidation
        from langchain_openai import ChatOpenAI
        large_llm = ChatOpenAI(
            model="gpt-4o",
            temperature=0,
            max_tokens=target_tokens
        )
        
        current_summary_part = f"\n\nCURRENT SUMMARY ({summary_tokens:,} tokens):\n{summary}" if summary else ""
        
        if use_inline_citations:
            # Standard mode with inline citations
            prompt = f"""
        You are consolidating a comprehensive research summary with additional documents. Your task is to create an enhanced, well-structured summary that integrates all the information effectively WITH PROPER INLINE CITATIONS.

        User Query: "{query_context}"

        CRITICAL CITATION INSTRUCTIONS:
        - You MUST include inline citations using the block numbers provided: [block X]
        - When you reference information from a document, immediately cite it: [block X]
        - Multiple sources for the same point: [block X, block Y]
        - Every factual claim, data point, or specific finding MUST be cited
        - Citations should be placed at the end of sentences or claims they support

        CONTENT INSTRUCTIONS:
        - Create a comprehensive, well-organized summary that addresses the user's query
        - Integrate information from the current summary and ALL additional documents
        - Organize information logically with clear sections and subsections
        - Include specific details, data, findings, and insights from all sources
        - Expand on key concepts with detailed explanations and context
        - Include methodological details, technical specifications, and quantitative data
        - Avoid redundancy while ensuring completeness
        - Maintain scientific accuracy and preserve important technical details
        - IMPORTANT: Aim for EXACTLY {target_tokens} tokens - be comprehensive and detailed to reach this target
        - Structure the output with clear headings, subheadings, and bullet points where appropriate{current_summary_part}

        ADDITIONAL DOCUMENTS TO INTEGRATE ({len(additional_documents)} documents, {docs_tokens:,} tokens):
        {docs_text}

        CONSOLIDATED COMPREHENSIVE SUMMARY WITH CITATIONS (MUST be approximately {target_tokens:,} tokens):
        """
        else:
            # Extensive search mode: Generate citations for tracking but will strip them later
            prompt = f"""
        You are consolidating a comprehensive research summary with additional documents. Your task is to create an enhanced, well-structured summary that integrates all the information effectively WITH PROPER INLINE CITATIONS for source tracking.

        User Query: "{query_context}"

        CRITICAL CITATION INSTRUCTIONS:
        - You MUST include inline citations using the block numbers provided: [block X]
        - When you reference information from a document, immediately cite it: [block X]
        - Multiple sources for the same point: [block X, block Y]
        - Every factual claim, data point, or specific finding MUST be cited
        - Citations should be placed at the end of sentences or claims they support
        - These citations will be used for source tracking and then processed for final formatting

        CONTENT INSTRUCTIONS:
        - Create a comprehensive, well-organized summary that addresses the user's query
        - Integrate information from the current summary and ALL additional documents
        - Organize information logically with clear sections and subsections
        - Include specific details, data, findings, and insights from all sources
        - Expand on key concepts with detailed explanations and context
        - Include methodological details, technical specifications, and quantitative data
        - Avoid redundancy while ensuring completeness
        - Maintain scientific accuracy and preserve important technical details
        - IMPORTANT: Aim for EXACTLY {target_tokens} tokens - be comprehensive and detailed to reach this target
        - Structure the output with clear headings, subheadings, and bullet points where appropriate{current_summary_part}

        ADDITIONAL DOCUMENTS TO INTEGRATE ({len(additional_documents)} documents, {docs_tokens:,} tokens):
        {docs_text}

        CONSOLIDATED COMPREHENSIVE SUMMARY WITH CITATIONS (MUST be approximately {target_tokens:,} tokens):
        """
        
        try:
            print(f"       🔄 Consolidating with large LLM {'and citations' if use_inline_citations else 'with citations for tracking (will be processed)'}...")
            response = large_llm.invoke(prompt)
            consolidated_summary = response.content.strip()
            
            # In extensive search mode: extract citations and clean text
            if not use_inline_citations:
                print(f"       🔧 Processing citations for extensive search mode...")
                # Extract all citations from the text for source tracking
                import re
                citations_found = re.findall(r'\[block \d+\]', consolidated_summary)
                unique_citations = list(set(citations_found))
                print(f"       📋 Citations found for source tracking: {len(unique_citations)} unique citations")
                
                # Strip all citations from the text for clean reading
                clean_summary = re.sub(r'\[block \d+\]', '', consolidated_summary)
                # Clean up any double spaces left by citation removal
                clean_summary = re.sub(r'\s+', ' ', clean_summary)
                # Clean up spacing around punctuation
                clean_summary = re.sub(r'\s+([.,;:])', r'\1', clean_summary)
                
                consolidated_summary = clean_summary.strip()
                print(f"       ✅ Citations extracted and text cleaned for extensive search mode")
                
                # Store citation info for reference building (if needed by calling code)
                if hasattr(response, 'citations_extracted'):
                    response.citations_extracted = unique_citations
                else:
                    # Add citation info as attribute for later use
                    consolidated_summary = consolidated_summary + f"\n\n<!-- CITATIONS_EXTRACTED: {','.join(unique_citations)} -->"
            
            consolidated_tokens = self.count_tokens(consolidated_summary)
            compression_ratio = (summary_tokens + docs_tokens) / consolidated_tokens if consolidated_tokens > 0 else 1
            print(f"       ✅ Summary consolidated {'with citations' if use_inline_citations else 'and cleaned'}: {consolidated_tokens:,} tokens (compression: {compression_ratio:.1f}x)")
            return consolidated_summary
            
        except Exception as e:
            print(f"       ❌ Error consolidating with large LLM: {e}")
            # Fallback to simple concatenation
            if use_inline_citations:
                # Fallback with basic citations
                if summary:
                    fallback = f"{summary}\n\n**Additional Information:**\n"
                else:
                    fallback = "**Comprehensive Summary:**\n\n"
                
                for i, doc in enumerate(additional_documents):
                    block_num = start_block_num + i
                    title = document_metadata[i].get('title', f'Document {i+1}')
                    fallback += f"\n**From {title} [block {block_num}]:**\n{doc[:500]}{'...' if len(doc) > 500 else ''}\n"
            else:
                # Fallback without citations (extensive search mode)
                if summary:
                    fallback = f"{summary}\n\n**Additional Information:**\n"
                else:
                    fallback = "**Comprehensive Summary:**\n\n"
                
                for i, doc in enumerate(additional_documents):
                    title = document_metadata[i].get('title', f'Document {i+1}')
                    fallback += f"\n**From {title}:**\n{doc[:500]}{'...' if len(doc) > 500 else ''}\n"
            
            return fallback
    
    def process_documents_with_source_tracking(self, documents, query_context, target_summary_tokens=8000, batch_size=10, use_inline_citations=False, disable_citations=False, detail_level="Balanced"):
        """
        Process documents with improved source tracking and batch processing using large LLM.
        
        Args:
            documents: List of (metadata, content) tuples
            query_context: User's query for context
            target_summary_tokens: Target length for final summary
            batch_size: Number of documents to process in each batch with large LLM
            use_inline_citations: Whether to include inline citations (False for extensive search mode)
            disable_citations: Whether to completely disable citation logic (True for extensive search mode)
            detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive)
            
        Returns:
            tuple: (comprehensive_summary, source_mapping, individual_summaries)
        """
        print(f"    📚 Processing {len(documents)} documents with source tracking")
        print(f"       • Batch size: {batch_size} documents per large LLM call")
        print(f"       • Target summary tokens: {target_summary_tokens:,}")
        if disable_citations:
            print(f"       • Citations: Completely disabled (extensive search mode)")
        else:
            print(f"       • Inline citations: {'Enabled' if use_inline_citations else 'Disabled (extensive search mode)'}")
        
        source_mapping = {}  # Maps content sections to source documents
        individual_summaries = []  # List of individual document summaries
        comprehensive_summary = ""
        doc_batch = []
        doc_metadata_batch = []
        documents_processed = 0
        
        for i, (metadata, content) in enumerate(documents):
            documents_processed += 1
            doc_batch.append(content)
            doc_metadata_batch.append(metadata)
            
            # Store individual document info for source tracking
            doc_id = f"doc_{i+1}"
            individual_summaries.append({
                'id': doc_id,
                'metadata': metadata,
                'content_preview': content[:200] + "..." if len(content) > 200 else content
            })
            
            # Process batch when we reach batch_size or end of documents
            if len(doc_batch) >= batch_size or i == len(documents) - 1:
                print(f"       🔄 Processing batch {(i//batch_size)+1}: documents {i+2-len(doc_batch)} to {i+1}")
                
                if comprehensive_summary == "":
                    # First batch: create initial comprehensive summary with generous allocation
                    if len(doc_batch) == 1:
                        # Single document: use create_initial_summary with generous initial size
                        # For single docs, use 90% of target, minimum 1500
                        initial_target = max(1500, int(target_summary_tokens * 0.9))
                        print(f"       • Single document target: {initial_target:,} tokens (from user target: {target_summary_tokens:,})")
                        comprehensive_summary = self.create_initial_summary(
                            doc_batch[0], 
                            query_context,
                            target_tokens=initial_target,
                            detail_level=detail_level
                        )
                    else:
                        # Multiple documents: use large LLM for batch processing with generous initial target
                        # Make the algorithm more responsive to user's target token request
                        # For targets <= 6000: use 80% of target, minimum 2000
                        # For targets > 6000: use 70% of target, minimum 3000
                        if target_summary_tokens <= 6000:
                            initial_target = max(2000, int(target_summary_tokens * 0.8))
                        else:
                            initial_target = max(3000, int(target_summary_tokens * 0.7))
                        
                        print(f"       • Target output: {initial_target:,} tokens (from user target: {target_summary_tokens:,})")
                        if disable_citations:
                            # Use simple consolidation without citations
                            comprehensive_summary = self.consolidate_with_large_llm(
                                "", 
                                doc_batch, 
                                query_context,
                                target_tokens=initial_target,
                                detail_level=detail_level
                            )
                        else:
                            # Use citation-enabled consolidation
                            comprehensive_summary = self.consolidate_with_large_llm_and_citations(
                                "", 
                                doc_batch, 
                                doc_metadata_batch,
                                query_context,
                                target_tokens=initial_target,
                                start_block_num=self.rag_instance.block_counter if self.rag_instance else 1,
                                use_inline_citations=use_inline_citations
                            )
                else:
                    # Subsequent batches: consolidate with existing summary
                    if disable_citations:
                        # Use simple consolidation without citations
                        comprehensive_summary = self.consolidate_with_large_llm(
                            comprehensive_summary,
                            doc_batch,
                            query_context,
                            target_tokens=target_summary_tokens,
                            detail_level=detail_level
                        )
                    else:
                        # Use citation-enabled consolidation
                        comprehensive_summary = self.consolidate_with_large_llm_and_citations(
                            comprehensive_summary,
                            doc_batch,
                            doc_metadata_batch,
                            query_context,
                            target_tokens=target_summary_tokens,
                            start_block_num=self.rag_instance.block_counter if self.rag_instance else 1,
                            use_inline_citations=use_inline_citations
                        )
                
                # Track which documents contributed to current summary section
                batch_start = i + 2 - len(doc_batch)
                batch_end = i + 1
                source_mapping[f"batch_{(i//batch_size)+1}"] = {
                    'documents': list(range(batch_start, batch_end + 1)),
                    'metadata': doc_metadata_batch.copy()
                }
                
                # Clear batch for next iteration
                doc_batch = []
                doc_metadata_batch = []
                
                print(f"         ✅ Batch processed: {self.count_tokens(comprehensive_summary):,} tokens")
                
                # More aggressive termination logic to reach target size
                current_tokens = self.count_tokens(comprehensive_summary)
                
                # Only stop if we've truly exceeded target by a reasonable margin OR processed all documents
                if current_tokens >= target_summary_tokens * 1.1:  # 110% of target
                    print(f"         🎯 Significantly exceeded target summary size ({current_tokens:,} > {target_summary_tokens * 1.1:,.0f}) with {documents_processed} documents")
                    break
                elif documents_processed >= len(documents):
                    print(f"         📋 All documents processed ({documents_processed}/{len(documents)}) - final size: {current_tokens:,} tokens")
                    break
                elif current_tokens >= target_summary_tokens * 0.98 and documents_processed >= len(documents) * 0.8:
                    print(f"         🎯 Near target size ({current_tokens:,}/{target_summary_tokens:,}) and processed most documents ({documents_processed}/{len(documents)})")
                    break
        
        print(f"     ✅ Document processing complete:")
        print(f"       • Documents processed: {documents_processed}/{len(documents)}")
        print(f"       • Final summary: {self.count_tokens(comprehensive_summary):,} tokens")
        print(f"       • Source batches: {len(source_mapping)}")
        
        return comprehensive_summary, source_mapping, individual_summaries
                        

Improved Code

🔍 Code Extractor

class ExtensiveSearchManager_v1

Purpose

Source Code

Parameters

Parameter Details

Return Value

Class Interface

Methods

`init(self, session, chroma_client, api_key, rag_instance)`

`count_tokens(self, text)`

`get_full_document(self, chunk_metadata, collection_name)`

`get_full_document_neo4j(self, chunk_uid)`

`get_document_by_uuid(self, document_uid)`

`summarize_document(self, document_text, query_context, max_tokens)`

`remove_duplicate_documents(self, documents_with_metadata, similarity_threshold)`

`_get_detail_level_instructions(self, detail_level)`

`create_initial_summary(self, document_text, query_context, target_tokens, detail_level)`

`enrich_summary(self, existing_summary, new_document, query_context, target_tokens, detail_level)`

`_enrich_with_chunking(self, existing_summary, new_document, query_context, target_tokens, detail_level)`

`_split_document_into_chunks(self, document, max_chunk_tokens)`

`consolidate_with_large_llm(self, summary, additional_documents, query_context, target_tokens, detail_level)`

`_consolidate_with_dynamic_batching(self, summary, additional_documents, query_context, target_tokens, detail_level)`

`_calculate_optimal_batch_size(self, current_summary, remaining_documents, target_tokens)`

`_consolidate_batch_with_large_llm(self, summary, batch_documents, query_context, target_tokens, detail_level)`

`consolidate_with_large_llm_and_citations(self, summary, additional_documents, document_metadata, query_context, target_tokens, start_block_num, use_inline_citations)`

`process_documents_with_source_tracking(self, documents, query_context, target_summary_tokens, batch_size, use_inline_citations, disable_citations, detail_level)`

Required Imports

Usage Example

Tags

Similar Components

class ExtensiveSearchManager 99.0% similar

function extensive_mode_example 53.0% similar

class ReferenceManager_v4 51.5% similar

class ReferenceManager_v3 51.4% similar

class ReferenceManager_v2 50.6% similar

class ExtensiveSearchManager_v1

Purpose

Source Code

Parameters

Parameter Details

Return Value

Class Interface

Methods

__init__(self, session, chroma_client, api_key, rag_instance)

count_tokens(self, text)

get_full_document(self, chunk_metadata, collection_name)

get_full_document_neo4j(self, chunk_uid)

get_document_by_uuid(self, document_uid)

summarize_document(self, document_text, query_context, max_tokens)

remove_duplicate_documents(self, documents_with_metadata, similarity_threshold)

_get_detail_level_instructions(self, detail_level)

create_initial_summary(self, document_text, query_context, target_tokens, detail_level)

enrich_summary(self, existing_summary, new_document, query_context, target_tokens, detail_level)

_enrich_with_chunking(self, existing_summary, new_document, query_context, target_tokens, detail_level)

_split_document_into_chunks(self, document, max_chunk_tokens)

consolidate_with_large_llm(self, summary, additional_documents, query_context, target_tokens, detail_level)

_consolidate_with_dynamic_batching(self, summary, additional_documents, query_context, target_tokens, detail_level)

_calculate_optimal_batch_size(self, current_summary, remaining_documents, target_tokens)

_consolidate_batch_with_large_llm(self, summary, batch_documents, query_context, target_tokens, detail_level)

consolidate_with_large_llm_and_citations(self, summary, additional_documents, document_metadata, query_context, target_tokens, start_block_num, use_inline_citations)

process_documents_with_source_tracking(self, documents, query_context, target_summary_tokens, batch_size, use_inline_citations, disable_citations, detail_level)

Required Imports

Usage Example

Tags

Similar Components

class ExtensiveSearchManager 99.0% similar

function extensive_mode_example 53.0% similar

class ReferenceManager_v4 51.5% similar

class ReferenceManager_v3 51.4% similar

class ReferenceManager_v2 50.6% similar

✨ Improve Code: ExtensiveSearchManager_v1

Code Comparison

`init(self, session, chroma_client, api_key, rag_instance)`

`count_tokens(self, text)`

`get_full_document(self, chunk_metadata, collection_name)`

`get_full_document_neo4j(self, chunk_uid)`

`get_document_by_uuid(self, document_uid)`

`summarize_document(self, document_text, query_context, max_tokens)`

`remove_duplicate_documents(self, documents_with_metadata, similarity_threshold)`

`_get_detail_level_instructions(self, detail_level)`

`create_initial_summary(self, document_text, query_context, target_tokens, detail_level)`

`enrich_summary(self, existing_summary, new_document, query_context, target_tokens, detail_level)`

`_enrich_with_chunking(self, existing_summary, new_document, query_context, target_tokens, detail_level)`

`_split_document_into_chunks(self, document, max_chunk_tokens)`

`consolidate_with_large_llm(self, summary, additional_documents, query_context, target_tokens, detail_level)`

`_consolidate_with_dynamic_batching(self, summary, additional_documents, query_context, target_tokens, detail_level)`

`_calculate_optimal_batch_size(self, current_summary, remaining_documents, target_tokens)`

`_consolidate_batch_with_large_llm(self, summary, batch_documents, query_context, target_tokens, detail_level)`

`consolidate_with_large_llm_and_citations(self, summary, additional_documents, document_metadata, query_context, target_tokens, start_block_num, use_inline_citations)`

`process_documents_with_source_tracking(self, documents, query_context, target_summary_tokens, batch_size, use_inline_citations, disable_citations, detail_level)`