class ExtensiveSearchManager_v1
Manages extensive search functionality including full document retrieval, summarization, and enhanced context gathering.
/tf/active/vicechatdev/vice_ai/hybrid_rag_engine.py
4887 - 6205
moderate
Purpose
Manages extensive search functionality including full document retrieval, summarization, and enhanced context gathering.
Source Code
class ExtensiveSearchManager:
"""
Manages extensive search functionality including full document retrieval,
summarization, and enhanced context gathering.
"""
def __init__(self, session, chroma_client, api_key="", rag_instance=None):
self.session = session
self.chroma_client = chroma_client
self.api_key = api_key
self.rag_instance = rag_instance # Reference to main RAG instance for usage tracking
self.summarizer = ChatOpenAI(
model="gpt-4o-mini",
temperature=0,
max_tokens=2000,
api_key=api_key
)
# Set up tokenizer for counting
self.tokenizer = tiktoken.get_encoding("cl100k_base")
# Initialize caches
self.document_cache = {}
self.summary_cache = {}
def count_tokens(self, text):
"""Count tokens in text"""
return len(self.tokenizer.encode(text))
def get_full_document(self, chunk_metadata, collection_name):
"""
Retrieve the full document that a chunk belongs to by using Neo4j to find all chunks
from the same document and reconstruct the complete document.
Args:
chunk_metadata: Metadata from the chunk containing bibtex path or document info
collection_name: Name of the ChromaDB collection
Returns:
str: Full document text ordered by chunk sequence or None if not found
"""
# Extract bibtex path or try to find document identifier
bibtex_path = chunk_metadata.get('bibtex', '')
if not bibtex_path:
return None
# Create cache key
cache_key = f"{collection_name}_{bibtex_path}"
if cache_key in self.document_cache:
return self.document_cache[cache_key]
try:
# The bibtex path in metadata should correspond to the document path
# First, try to find the document directly by path
query = """
MATCH (d:Document)
WHERE d.Path = $bibtex_path OR d.Name CONTAINS $doc_name OR toString(d.UID) = $bibtex_path
MATCH (d)-[:CHUNK]->(c)
WHERE c:Text_chunk OR c:Table_chunk
RETURN c.Text AS content, c.UID AS chunk_uid, c.Name AS chunk_name, d.UID AS doc_uid, d.Name AS doc_name
ORDER BY c.Name
"""
# Extract potential document name from bibtex path
doc_name = bibtex_path.split('/')[-1] if '/' in bibtex_path else bibtex_path
results = self.session.run(query, {
"bibtex_path": bibtex_path,
"doc_name": doc_name
})
chunks = []
doc_info = None
for record in results:
if record["content"]:
chunks.append(record["content"])
if not doc_info:
doc_info = {
"doc_uid": record["doc_uid"],
"doc_name": record["doc_name"]
}
# If direct path matching didn't work, try fuzzy matching on document content paths
if not chunks:
# Extract filename without extension for broader matching
if '.' in doc_name:
base_name = doc_name.rsplit('.', 1)[0]
else:
base_name = doc_name
fuzzy_query = """
MATCH (d:Document)-[:CHUNK]->(c)
WHERE (c:Text_chunk OR c:Table_chunk)
AND (d.Name CONTAINS $base_name OR d.Path CONTAINS $base_name
OR any(path_part IN split(d.Path, '/') WHERE path_part CONTAINS $base_name))
RETURN c.Text AS content, c.UID AS chunk_uid, c.Name AS chunk_name,
d.UID AS doc_uid, d.Name AS doc_name, d.Path AS doc_path
ORDER BY d.UID, c.Name
LIMIT 100
"""
results = self.session.run(fuzzy_query, {"base_name": base_name})
# Group by document to find the best match
doc_candidates = {}
for record in results:
doc_uid = record["doc_uid"]
if doc_uid not in doc_candidates:
doc_candidates[doc_uid] = {
"chunks": [],
"doc_name": record["doc_name"],
"doc_path": record["doc_path"]
}
if record["content"]:
doc_candidates[doc_uid]["chunks"].append(record["content"])
# Pick the document with the most chunks (likely the most complete)
if doc_candidates:
best_doc = max(doc_candidates.items(), key=lambda x: len(x[1]["chunks"]))
chunks = best_doc[1]["chunks"]
doc_info = {
"doc_uid": best_doc[0],
"doc_name": best_doc[1]["doc_name"],
"doc_path": best_doc[1]["doc_path"]
}
print(f" 📄 Fuzzy matched document: {doc_info['doc_name']} with {len(chunks)} chunks")
if chunks:
# Concatenate all chunks to reconstruct the full document
full_document = "\n\n".join(chunks)
self.document_cache[cache_key] = full_document
print(f" ✅ Full document retrieved: {len(chunks)} chunks, {len(full_document)} chars")
if doc_info:
print(f" 📄 Document: {doc_info['doc_name']} (UID: {doc_info['doc_uid']})")
return full_document
else:
print(f" ⚠️ No chunks found for document path: {bibtex_path}")
except Exception as e:
print(f" ❌ Error retrieving full document from Neo4j: {e}")
return None
def get_full_document_neo4j(self, chunk_uid):
"""
Retrieve the full document from Neo4j that a chunk belongs to.
Updated to match the actual Neo4j schema from offline_docstore_multi_vice.py
Args:
chunk_uid: UID of the text/table chunk
Returns:
str: Full document text ordered by chunk sequence or None if not found
"""
cache_key = f"neo4j_{chunk_uid}"
if cache_key in self.document_cache:
return self.document_cache[cache_key]
try:
# Query based on the actual schema: Document-[:CHUNK]->Text_chunk/Table_chunk
query = """
MATCH (chunk {UID: $chunk_uid})<-[:CHUNK]-(doc:Document)
MATCH (doc)-[:CHUNK]->(all_chunks)
WHERE all_chunks:Text_chunk OR all_chunks:Table_chunk
RETURN all_chunks.Text AS content, all_chunks.UID AS chunk_uid,
all_chunks.Name AS chunk_name, doc.UID AS doc_uid, doc.Name AS doc_name
ORDER BY all_chunks.Name
"""
results = self.session.run(query, {"chunk_uid": chunk_uid})
chunks = []
doc_info = None
for record in results:
if record["content"]:
chunks.append(record["content"])
if not doc_info:
doc_info = {
"doc_uid": record["doc_uid"],
"doc_name": record["doc_name"]
}
if chunks:
# Concatenate all chunks to reconstruct the full document
full_document = "\n\n".join(chunks)
self.document_cache[cache_key] = full_document
print(f"✅ Neo4j full document retrieved: {len(chunks)} chunks, {len(full_document)} chars from {doc_info['doc_name']}")
return full_document
else:
print(f"⚠️ No Neo4j document found for chunk UID: {chunk_uid}")
except Exception as e:
print(f"❌ Error retrieving full document from Neo4j: {e}")
return None
def get_document_by_uuid(self, document_uid):
"""
Retrieve a full document from Neo4j by its document UID.
This is used for reference document retrieval from AI responses.
Args:
document_uid: UID of the document to retrieve
Returns:
dict: Document information with content, name, and metadata or None if not found
"""
cache_key = f"doc_uuid_{document_uid}"
if cache_key in self.document_cache:
return self.document_cache[cache_key]
try:
print(f"🔍 Retrieving document by UUID: {document_uid}")
# Query to get document info and all its chunks
query = """
MATCH (doc:Document {UID: $document_uid})
OPTIONAL MATCH (doc)-[:CHUNK]->(chunk)
WHERE chunk:Text_chunk OR chunk:Table_chunk
RETURN doc.Name AS doc_name, doc.Type AS doc_type, doc.File AS doc_file,
collect(chunk.Text) AS chunk_texts, collect(chunk.Name) AS chunk_names,
count(chunk) AS chunk_count
"""
result = self.session.run(query, {"document_uid": document_uid})
record = result.single()
if not record:
print(f"⚠️ No document found with UID: {document_uid}")
return None
# Extract document info
doc_info = {
"uid": document_uid,
"name": record["doc_name"],
"type": record["doc_type"],
"file": record["doc_file"],
"chunk_count": record["chunk_count"]
}
# Combine all chunk texts to reconstruct the full document
chunk_texts = [text for text in record["chunk_texts"] if text]
if chunk_texts:
full_content = "\n\n".join(chunk_texts)
doc_info["content"] = full_content
doc_info["content_length"] = len(full_content)
# Cache the result
self.document_cache[cache_key] = doc_info
print(f"✅ Document retrieved: {doc_info['name']} ({doc_info['chunk_count']} chunks, {len(full_content)} chars)")
return doc_info
else:
print(f"⚠️ Document found but no content chunks: {doc_info['name']}")
return None
except Exception as e:
print(f"❌ Error retrieving document by UUID {document_uid}: {e}")
return None
def summarize_document(self, document_text, query_context, max_tokens=1500):
"""
Summarize a full document with focus on the query context.
Args:
document_text: Full document text to summarize
query_context: The user's query to focus the summary
max_tokens: Maximum tokens for the summary
Returns:
str: Summarized document text
"""
print(f" 🤖 Using small LLM (gpt-4o-mini) for document summarization")
print(f" • Input document length: {self.count_tokens(document_text):,} tokens")
print(f" • Target summary length: {max_tokens:,} tokens")
print(f" • Query-focused: '{query_context[:100]}{'...' if len(query_context) > 100 else ''}'")
# Track usage if RAG instance is available
if self.rag_instance:
self.rag_instance.small_llm_usage["document_summarization"] += 1
self.rag_instance.small_llm_usage["total_calls"] += 1
# Check cache first
cache_key = f"{hash(document_text[:500])}_{hash(query_context)}"
if cache_key in self.summary_cache:
print(f" ✅ Using cached summary")
return self.summary_cache[cache_key]
# If document is already short enough, return as is
if self.count_tokens(document_text) <= max_tokens:
print(f" ✅ Document already under token limit, using full text")
return document_text
# Create summarization prompt
prompt = f"""
You are a document summarization expert. Please summarize the following document with particular focus on information relevant to this query: "{query_context}"
Instructions:
- Maintain all key factual information relevant to the query
- Preserve important technical details, numbers, and citations
- Keep the summary under {max_tokens} tokens
- Structure the summary logically with clear sections if applicable
- Retain any critical background information needed to understand the main points
Document to summarize:
{document_text}
Summary:
"""
try:
print(f" 🔄 Generating query-focused summary...")
response = self.summarizer.invoke(prompt)
summary = response.content.strip()
summary_tokens = self.count_tokens(summary)
print(f" ✅ Summary generated: {summary_tokens:,} tokens ({100*summary_tokens/self.count_tokens(document_text):.1f}% of original)")
# Cache the summary
self.summary_cache[cache_key] = summary
return summary
except Exception as e:
print(f" ❌ Error summarizing document: {e}")
print(f" 🔄 Fallback: truncating document to {max_tokens} tokens")
# Fallback: truncate the document
tokens = self.tokenizer.encode(document_text)
if len(tokens) > max_tokens:
truncated_tokens = tokens[:max_tokens]
return self.tokenizer.decode(truncated_tokens)
return document_text
def remove_duplicate_documents(self, documents_with_metadata, similarity_threshold=0.85):
"""
Remove duplicate documents based on content similarity.
Args:
documents_with_metadata: List of (metadata, document_text) tuples
similarity_threshold: Threshold for considering documents as duplicates
Returns:
List of unique documents
"""
if len(documents_with_metadata) <= 1:
return documents_with_metadata
unique_docs = []
seen_embeddings = []
try:
# Use OpenAI embeddings for similarity comparison
from openai import OpenAI
client = OpenAI(api_key=self.api_key)
for metadata, doc_text in documents_with_metadata:
# Generate embedding for current document
if self.count_tokens(doc_text) > 8192:
# Use first part of document for embedding if too long
embed_text = self.tokenizer.decode(self.tokenizer.encode(doc_text)[:8192])
else:
embed_text = doc_text
response = client.embeddings.create(
model="text-embedding-3-small",
input=embed_text
)
current_embedding = np.array(response.data[0].embedding)
# Normalize embedding
current_embedding = current_embedding / np.linalg.norm(current_embedding)
# Check for similarity with existing documents
is_duplicate = False
for seen_emb in seen_embeddings:
similarity = np.dot(current_embedding, seen_emb)
if similarity >= similarity_threshold:
is_duplicate = True
break
if not is_duplicate:
unique_docs.append((metadata, doc_text))
seen_embeddings.append(current_embedding)
return unique_docs
except Exception as e:
print(f"Error in duplicate removal: {e}")
# Fallback: return all documents
return documents_with_metadata
def _get_detail_level_instructions(self, detail_level):
"""
Get detail-level specific instructions for summarization.
Args:
detail_level: Summary, Balanced, Detailed, or Comprehensive
Returns:
tuple: (instruction_text, target_multiplier, expansion_guidance)
"""
if detail_level == "Summary":
return (
"""- Focus on key points, main findings, and essential conclusions
- Use concise language while maintaining accuracy
- Prioritize the most important information relevant to the query
- Include only critical data points and major insights
- Structure information clearly but without excessive detail""",
0.7, # Use 70% of target tokens
"concise but comprehensive coverage of key points"
)
elif detail_level == "Balanced":
return (
"""- Provide a well-balanced mix of overview and detail
- Include important findings with supporting context
- Balance breadth of coverage with depth of explanation
- Include relevant data, methods, and key insights
- Provide sufficient detail for understanding without overwhelming""",
1.0, # Use 100% of target tokens
"balanced coverage with appropriate level of detail"
)
elif detail_level == "Detailed":
return (
"""- Provide comprehensive coverage with extensive detail
- Include detailed explanations of methods, procedures, and findings
- Expand on technical specifications and quantitative data
- Include background information and theoretical foundations
- Provide extensive context and implications
- Include specific examples, case studies, and applications""",
1.3, # Use 130% of target tokens (system will compress as needed)
"comprehensive and detailed coverage with extensive explanations"
)
else: # Comprehensive
return (
"""- Provide exhaustive coverage of all relevant information
- Include comprehensive explanations with maximum detail
- Cover all methodological details, technical specifications, and data
- Provide extensive background, context, and theoretical foundations
- Include all examples, case studies, applications, and implications
- Expand on every relevant concept with full explanations
- Include detailed analysis and comprehensive interpretation""",
1.5, # Use 150% of target tokens (system will compress as needed)
"exhaustive and comprehensive coverage with maximum detail"
)
def create_initial_summary(self, document_text, query_context, target_tokens=2000, detail_level="Balanced"):
"""
Create an initial comprehensive summary from the first document.
Args:
document_text: Text of the first document
query_context: The user's query for context
target_tokens: Target length for the initial summary
detail_level: Summary, Balanced, Detailed, or Comprehensive
Returns:
str: Initial comprehensive summary
"""
print(f" 🤖 Using small LLM (gpt-4o-mini) for initial comprehensive summary")
print(f" • Creating foundation summary from first document")
print(f" • Detail level: {detail_level}")
# Track usage if RAG instance is available
if self.rag_instance:
self.rag_instance.small_llm_usage["document_summarization"] += 1
self.rag_instance.small_llm_usage["total_calls"] += 1
# Get detail-level specific instructions
detail_instructions, target_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
adjusted_target = int(target_tokens * target_multiplier)
# If document is short enough, use as foundation
if self.count_tokens(document_text) <= adjusted_target:
print(f" ✅ Document under target length, using as foundation")
return f"**Comprehensive Summary ({detail_level} level): {query_context}**\n\n{document_text}"
prompt = f"""
You are creating the initial comprehensive summary for the user's query. This will be the foundation that gets enriched with additional documents.
User Query: "{query_context}"
Detail Level: {detail_level}
Instructions:
- Create a structured, comprehensive summary that directly addresses the user's query
- Organize information into clear sections with headers and subheadings
- Use a format that can be easily expanded with additional information
- CRITICAL: Aim for EXACTLY {adjusted_target} tokens - focus on {expansion_guidance}
- Use comprehensive markdown formatting with multiple levels of structure
Detail Level Specific Instructions:
{detail_instructions}
Document to summarize:
{document_text}
Comprehensive Summary ({detail_level} level, MUST be approximately {adjusted_target:,} tokens):
"""
try:
print(f" 🔄 Generating foundation summary...")
response = self.summarizer.invoke(prompt)
summary = response.content.strip()
summary_tokens = self.count_tokens(summary)
print(f" ✅ Foundation summary created: {summary_tokens:,} tokens")
return summary
except Exception as e:
print(f" ❌ Error creating initial summary: {e}")
# Fallback: truncate document
tokens = self.tokenizer.encode(document_text)
if len(tokens) > adjusted_target:
truncated_tokens = tokens[:adjusted_target]
return f"**Comprehensive Summary ({detail_level} level): {query_context}**\n\n" + self.tokenizer.decode(truncated_tokens)
return f"**Comprehensive Summary ({detail_level} level): {query_context}**\n\n{document_text}"
def enrich_summary(self, existing_summary, new_document, query_context, target_tokens=8000, detail_level="Balanced"):
"""
Enrich an existing summary with information from a new document.
Implements smart chunking for large documents to avoid token limit violations.
Args:
existing_summary: Current comprehensive summary
new_document: New document text to integrate
query_context: User's query for context
target_tokens: Target length for enriched summary
detail_level: Summary, Balanced, Detailed, or Comprehensive
Returns:
str: Enriched comprehensive summary
"""
print(f" 🤖 Using small LLM (gpt-4o-mini) for summary enrichment")
print(f" • Integrating new document into existing summary")
print(f" • Detail level: {detail_level}")
# Track usage if RAG instance is available
if self.rag_instance:
self.rag_instance.small_llm_usage["document_summarization"] += 1
self.rag_instance.small_llm_usage["total_calls"] += 1
# Get detail-level specific instructions
detail_instructions, target_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
adjusted_target = int(target_tokens * target_multiplier)
existing_tokens = self.count_tokens(existing_summary)
new_doc_tokens = self.count_tokens(new_document)
print(f" • Existing summary: {existing_tokens:,} tokens")
print(f" • New document: {new_doc_tokens:,} tokens")
print(f" • Target tokens: {adjusted_target:,}")
# Calculate available space for expansion
available_expansion = adjusted_target - existing_tokens
print(f" • Available expansion space: {available_expansion:,} tokens")
# Check if input will exceed context limits for gpt-4o-mini (128k context)
prompt_overhead = 1000 # Estimated prompt overhead
max_context_mini = 125000 # Conservative limit for gpt-4o-mini
total_input = existing_tokens + new_doc_tokens + prompt_overhead
if total_input > max_context_mini:
print(f" ⚠️ Input too large ({total_input:,} tokens), implementing document chunking...")
return self._enrich_with_chunking(existing_summary, new_document, query_context, adjusted_target, detail_level)
prompt = f"""
You are enriching a comprehensive summary with new information. Your task is to integrate the new document's relevant information into the existing summary while expanding its scope and detail.
User Query: "{query_context}"
Detail Level: {detail_level}
Instructions:
- EXPAND the existing summary by integrating relevant information from the new document
- Add new sections if the new document covers different aspects
- Enrich existing sections with additional details, data, and insights from the new document
- Avoid redundancy - don't repeat information already covered
- Maintain the structured, organized format
- Prioritize information most relevant to the user's query
- TARGET LENGTH: approximately {adjusted_target} tokens (focus on {expansion_guidance})
- Preserve all important details, facts, figures, and specific findings
- When adding information, be specific about sources and context
Detail Level Specific Instructions:
{detail_instructions}
CURRENT COMPREHENSIVE SUMMARY ({existing_tokens:,} tokens):
{existing_summary}
NEW DOCUMENT TO INTEGRATE ({new_doc_tokens:,} tokens):
{new_document}
ENRICHED COMPREHENSIVE SUMMARY ({detail_level} level, target: ~{adjusted_target:,} tokens):
"""
try:
print(f" 🔄 Enriching summary with new information...")
response = self.summarizer.invoke(prompt)
enriched_summary = response.content.strip()
enriched_tokens = self.count_tokens(enriched_summary)
compression_ratio = (existing_tokens + new_doc_tokens) / enriched_tokens if enriched_tokens > 0 else 1
print(f" ✅ Summary enriched: {enriched_tokens:,} tokens (compression: {compression_ratio:.1f}x)")
return enriched_summary
except Exception as e:
print(f" ❌ Error enriching summary: {e}")
# Fallback to chunking if regular approach fails
print(f" 🔄 Falling back to document chunking...")
return self._enrich_with_chunking(existing_summary, new_document, query_context, adjusted_target, detail_level)
def _enrich_with_chunking(self, existing_summary, new_document, query_context, target_tokens, detail_level="Balanced"):
"""
Enrich summary by processing large documents in chunks to avoid token limits.
Args:
existing_summary: Current summary
new_document: Large document to chunk and process
query_context: User's query for context
target_tokens: Target output tokens
Returns:
str: Enriched summary
"""
print(f" 📊 Chunking large document for processing")
# Calculate chunk size based on available context
existing_tokens = self.count_tokens(existing_summary)
max_context = 125000
prompt_overhead = 1000
available_for_doc = max_context - existing_tokens - prompt_overhead
# Split document into manageable chunks
chunk_size = min(available_for_doc // 2, 40000) # Conservative chunk size
chunks = self._split_document_into_chunks(new_document, chunk_size)
print(f" 📄 Split document into {len(chunks)} chunks (max {chunk_size:,} tokens each)")
current_summary = existing_summary
for i, chunk in enumerate(chunks):
print(f" 🔄 Processing chunk {i+1}/{len(chunks)}")
chunk_tokens = self.count_tokens(chunk)
current_tokens = self.count_tokens(current_summary)
# Check if we've reached target
if current_tokens >= target_tokens:
print(f" 🎯 Reached target tokens ({current_tokens:,}/{target_tokens:,}), stopping chunk processing")
break
# Process this chunk
prompt = f"""
You are enriching a comprehensive summary with a chunk of new information. Focus on integrating the most relevant and important information from this chunk.
User Query: "{query_context}"
Instructions:
- EXPAND the existing summary by integrating relevant information from this document chunk
- Add new sections or enrich existing sections as appropriate
- Avoid redundancy with information already in the summary
- Prioritize information most relevant to the user's query
- TARGET LENGTH: approximately {target_tokens} tokens
- Be comprehensive but avoid unnecessary repetition
CURRENT COMPREHENSIVE SUMMARY ({current_tokens:,} tokens):
{current_summary}
DOCUMENT CHUNK TO INTEGRATE (chunk {i+1}/{len(chunks)}, {chunk_tokens:,} tokens):
{chunk}
ENRICHED COMPREHENSIVE SUMMARY (target: ~{target_tokens:,} tokens):
"""
try:
response = self.summarizer.invoke(prompt)
current_summary = response.content.strip()
new_tokens = self.count_tokens(current_summary)
print(f" ✅ Chunk processed: {new_tokens:,} tokens")
except Exception as e:
print(f" ❌ Error processing chunk {i+1}: {e}")
# Skip this chunk and continue
continue
final_tokens = self.count_tokens(current_summary)
print(f" ✅ Chunked processing complete: {final_tokens:,} tokens")
return current_summary
def _split_document_into_chunks(self, document, max_chunk_tokens):
"""
Split a document into chunks of approximately max_chunk_tokens size.
Args:
document: Text to split
max_chunk_tokens: Maximum tokens per chunk
Returns:
list: List of document chunks
"""
# Use tiktoken for accurate token counting
tokens = self.tokenizer.encode(document)
chunks = []
for i in range(0, len(tokens), max_chunk_tokens):
chunk_tokens = tokens[i:i + max_chunk_tokens]
chunk_text = self.tokenizer.decode(chunk_tokens)
chunks.append(chunk_text)
return chunks
def consolidate_with_large_llm(self, summary, additional_documents, query_context, target_tokens=8000, detail_level="Balanced"):
"""
Use the large LLM to consolidate a summary with a batch of additional documents.
This is more efficient than incremental enrichment for larger batches.
Implements dynamic batch sizing to respect token limits.
Args:
summary: Current summary to consolidate
additional_documents: List of document texts to integrate
query_context: User's query for context
target_tokens: Target length for consolidated summary
detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive)
Returns:
str: Consolidated comprehensive summary
"""
print(f" 🧠 Using large LLM for batch consolidation")
print(f" • Consolidating summary with {len(additional_documents)} documents")
# Track usage if RAG instance is available
if self.rag_instance:
self.rag_instance.large_llm_usage += 1
summary_tokens = self.count_tokens(summary)
# Check total token count and implement dynamic batching if necessary
docs_text = "\n\n---DOCUMENT SEPARATOR---\n\n".join(additional_documents)
docs_tokens = self.count_tokens(docs_text)
print(f" • Current summary: {summary_tokens:,} tokens")
print(f" • Additional documents: {docs_tokens:,} tokens")
print(f" • Target output: {target_tokens:,} tokens")
# Calculate total input tokens including prompt overhead (approximately 1000 tokens)
prompt_overhead = 1000
total_input_tokens = summary_tokens + docs_tokens + prompt_overhead
max_context_length = 125000 # Conservative limit for gpt-4o (128k context)
# If input exceeds context limit, implement dynamic batching
if total_input_tokens > max_context_length:
print(f" ⚠️ Input too large ({total_input_tokens:,} tokens), implementing dynamic batching...")
return self._consolidate_with_dynamic_batching(summary, additional_documents, query_context, target_tokens, detail_level)
# Get detail level specific instructions
detail_instructions, token_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
adjusted_target_tokens = int(target_tokens * token_multiplier)
# Use the large LLM (gpt-4o) for consolidation
from langchain_openai import ChatOpenAI
large_llm = ChatOpenAI(
model="gpt-4o",
temperature=0,
max_tokens=adjusted_target_tokens
)
# Get detail level specific instructions
detail_instructions, token_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
adjusted_target_tokens = int(target_tokens * token_multiplier)
prompt = f"""
You are consolidating a comprehensive research summary with additional documents. Your task is to create an enhanced, well-structured summary that integrates all the information effectively.
User Query: "{query_context}"
Detail Level: {detail_level}
{detail_instructions}
{expansion_guidance}
Instructions:
- Create a comprehensive, well-organized summary that addresses the user's query
- Integrate information from the current summary and ALL additional documents
- Organize information logically with clear sections and subsections
- Include specific details, data, findings, and insights from all sources
- Expand on key concepts with detailed explanations and context
- Include methodological details, technical specifications, and quantitative data
- Avoid redundancy while ensuring completeness
- Maintain scientific accuracy and preserve important technical details
- **CRITICAL TARGET REQUIREMENT: Generate EXACTLY {adjusted_target_tokens:,} tokens - this is not optional**
- **You MUST expand sections, add detail, include examples, and elaborate until you reach {adjusted_target_tokens:,} tokens**
- **If your response is shorter than {adjusted_target_tokens:,} tokens, you have failed the task**
- Structure the output with clear headings, subheadings, and bullet points where appropriate
CURRENT SUMMARY ({summary_tokens:,} tokens):
{summary}
ADDITIONAL DOCUMENTS TO INTEGRATE ({len(additional_documents)} documents, {docs_tokens:,} tokens):
{docs_text}
CONSOLIDATED COMPREHENSIVE SUMMARY (MANDATORY TARGET: EXACTLY {adjusted_target_tokens:,} tokens - DO NOT submit shorter responses):
"""
try:
print(f" 🔄 Consolidating with large LLM...")
response = large_llm.invoke(prompt)
consolidated_summary = response.content.strip()
consolidated_tokens = self.count_tokens(consolidated_summary)
compression_ratio = (summary_tokens + docs_tokens) / consolidated_tokens if consolidated_tokens > 0 else 1
# Check if LLM met the target
target_achievement = (consolidated_tokens / adjusted_target_tokens) * 100
if target_achievement < 80:
print(f" ⚠️ LLM underperformed: {consolidated_tokens:,} tokens vs {adjusted_target_tokens:,} target ({target_achievement:.1f}%)")
elif target_achievement > 120:
print(f" ✅ LLM exceeded target: {consolidated_tokens:,} tokens vs {adjusted_target_tokens:,} target ({target_achievement:.1f}%)")
else:
print(f" ✅ LLM met target: {consolidated_tokens:,} tokens vs {adjusted_target_tokens:,} target ({target_achievement:.1f}%)")
print(f" ✅ Summary consolidated: {consolidated_tokens:,} tokens (compression: {compression_ratio:.1f}x)")
return consolidated_summary
except Exception as e:
print(f" ❌ Error consolidating with large LLM: {e}")
# Fallback to dynamic batching instead of simple incremental enrichment
print(f" 🔄 Falling back to dynamic batching...")
return self._consolidate_with_dynamic_batching(summary, additional_documents, query_context, target_tokens, detail_level)
def _consolidate_with_dynamic_batching(self, summary, additional_documents, query_context, target_tokens=8000, detail_level="Balanced"):
"""
Consolidate documents using dynamic batch sizing to respect token limits.
Args:
summary: Current summary
additional_documents: List of document texts to integrate
query_context: User's query for context
target_tokens: Target length for consolidated summary
detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive)
Returns:
str: Consolidated summary
"""
print(f" 📊 Dynamic batching: Processing {len(additional_documents)} documents in token-aware batches")
current_summary = summary
summary_tokens = self.count_tokens(summary)
i = 0
while i < len(additional_documents):
# Determine optimal batch size based on current token counts
batch_size = self._calculate_optimal_batch_size(
current_summary, additional_documents[i:], target_tokens
)
# Process the batch
batch_end = min(i + batch_size, len(additional_documents))
batch = additional_documents[i:batch_end]
print(f" 🔄 Dynamic batch {(i//batch_size)+1}: processing {len(batch)} documents (docs {i+1}-{batch_end})")
if len(batch) == 1:
# Single document: use enrichment instead of large LLM
current_summary = self.enrich_summary(current_summary, batch[0], query_context, target_tokens, detail_level)
else:
# Multiple documents: try large LLM with current batch
try:
batch_text = "\n\n---DOCUMENT SEPARATOR---\n\n".join(batch)
batch_tokens = self.count_tokens(batch_text)
current_tokens = self.count_tokens(current_summary)
# Check if this batch will fit
total_input = current_tokens + batch_tokens + 1000 # 1000 for prompt overhead
if total_input > 125000:
print(f" ⚠️ Batch still too large ({total_input:,} tokens), processing individually...")
# Process each document individually
for doc in batch:
current_summary = self.enrich_summary(current_summary, doc, query_context, target_tokens, detail_level)
else:
# Use large LLM for batch
current_summary = self._consolidate_batch_with_large_llm(
current_summary, batch, query_context, target_tokens, detail_level
)
except Exception as e:
print(f" ❌ Error in dynamic batch processing: {e}")
# Fallback to individual processing
for doc in batch:
current_summary = self.enrich_summary(current_summary, doc, query_context, target_tokens, detail_level)
i = batch_end
# Check if we've reached target token size
current_tokens = self.count_tokens(current_summary)
if current_tokens >= target_tokens:
print(f" 🎯 Reached target tokens ({current_tokens:,}/{target_tokens:,}), stopping dynamic batching")
break
return current_summary
def _calculate_optimal_batch_size(self, current_summary, remaining_documents, target_tokens):
"""
Calculate optimal batch size based on token limits and current state.
Args:
current_summary: Current summary text
remaining_documents: List of remaining documents to process
target_tokens: Target output tokens
Returns:
int: Optimal batch size
"""
if not remaining_documents:
return 0
summary_tokens = self.count_tokens(current_summary)
max_context = 125000 # Conservative limit
prompt_overhead = 1000
available_tokens = max_context - summary_tokens - prompt_overhead
# Start with batch size of 1 and increase until we hit limits
batch_size = 1
cumulative_tokens = 0
for i, doc in enumerate(remaining_documents):
doc_tokens = self.count_tokens(doc)
if cumulative_tokens + doc_tokens > available_tokens:
break
cumulative_tokens += doc_tokens
batch_size = i + 1
# Cap at reasonable batch size to avoid extremely long processing
if batch_size >= 10:
break
return max(1, batch_size) # Ensure at least 1 document
def _consolidate_batch_with_large_llm(self, summary, batch_documents, query_context, target_tokens, detail_level="Balanced"):
"""
Consolidate a specific batch with the large LLM.
"""
# Get detail level specific instructions
detail_instructions, token_multiplier, expansion_guidance = self._get_detail_level_instructions(detail_level)
adjusted_target_tokens = int(target_tokens * token_multiplier)
from langchain_openai import ChatOpenAI
large_llm = ChatOpenAI(
model="gpt-4o",
temperature=0,
max_tokens=adjusted_target_tokens
)
docs_text = "\n\n---DOCUMENT SEPARATOR---\n\n".join(batch_documents)
prompt = f"""
You are consolidating a comprehensive research summary with a batch of additional documents. Integrate all information effectively.
User Query: "{query_context}"
Detail Level: {detail_level}
{detail_instructions}
{expansion_guidance}
Instructions:
- Enhance the existing summary with information from ALL batch documents
- Organize information logically with clear sections and subsections
- Include specific details, data, findings, and insights from all sources
- Avoid redundancy while ensuring completeness
- IMPORTANT: Aim for approximately {adjusted_target_tokens} tokens - be comprehensive and detailed
- Structure with clear headings and formatting
CURRENT SUMMARY:
{summary}
BATCH DOCUMENTS TO INTEGRATE:
{docs_text}
ENHANCED COMPREHENSIVE SUMMARY:
"""
response = large_llm.invoke(prompt)
return response.content.strip()
def consolidate_with_large_llm_and_citations(self, summary, additional_documents, document_metadata, query_context, target_tokens=8000, start_block_num=1, use_inline_citations=True):
"""
Use the large LLM to consolidate a summary with a batch of additional documents, with optional citations.
Args:
summary: Current summary to consolidate
additional_documents: List of document texts to integrate
document_metadata: List of metadata for each document
query_context: User's query for context
target_tokens: Target length for consolidated summary
start_block_num: Starting block number for citations
use_inline_citations: Whether to include inline citations (False for extensive search mode)
Returns:
str: Consolidated comprehensive summary with or without inline citations
"""
print(f" 🧠 Using large LLM for batch consolidation {'with citations' if use_inline_citations else 'without citations'}")
print(f" • Consolidating summary with {len(additional_documents)} documents")
# Track usage if RAG instance is available
if self.rag_instance:
self.rag_instance.large_llm_usage += 1
summary_tokens = self.count_tokens(summary) if summary else 0
# Prepare documents - with or without block numbers for citation
if use_inline_citations:
numbered_documents = []
block_nums = []
for i, (doc, metadata) in enumerate(zip(additional_documents, document_metadata)):
# Calculate the actual block number that will be used for this document
# The first block is the comprehensive summary, then individual sources follow
block_num = start_block_num + 1 + i # +1 because comprehensive summary takes the first block
block_nums.append(block_num)
title = metadata.get('title', f"Document {i+1}")
numbered_doc = f"**[block {block_num}] {title}**\n{doc}"
numbered_documents.append(numbered_doc)
else:
# For extensive search mode: simple document preparation without block numbers
numbered_documents = []
block_nums = []
for i, (doc, metadata) in enumerate(zip(additional_documents, document_metadata)):
title = metadata.get('title', f"Document {i+1}")
numbered_doc = f"**{title}**\n{doc}"
numbered_documents.append(numbered_doc)
docs_text = "\n\n---DOCUMENT SEPARATOR---\n\n".join(numbered_documents)
docs_tokens = self.count_tokens(docs_text)
print(f" • Current summary: {summary_tokens:,} tokens")
print(f" • Additional documents: {docs_tokens:,} tokens")
print(f" • Target output: {target_tokens:,} tokens")
if use_inline_citations:
print(f" • Block numbers: {block_nums}")
# Use the large LLM (gpt-4o) for consolidation
from langchain_openai import ChatOpenAI
large_llm = ChatOpenAI(
model="gpt-4o",
temperature=0,
max_tokens=target_tokens
)
current_summary_part = f"\n\nCURRENT SUMMARY ({summary_tokens:,} tokens):\n{summary}" if summary else ""
if use_inline_citations:
# Standard mode with inline citations
prompt = f"""
You are consolidating a comprehensive research summary with additional documents. Your task is to create an enhanced, well-structured summary that integrates all the information effectively WITH PROPER INLINE CITATIONS.
User Query: "{query_context}"
CRITICAL CITATION INSTRUCTIONS:
- You MUST include inline citations using the block numbers provided: [block X]
- When you reference information from a document, immediately cite it: [block X]
- Multiple sources for the same point: [block X, block Y]
- Every factual claim, data point, or specific finding MUST be cited
- Citations should be placed at the end of sentences or claims they support
CONTENT INSTRUCTIONS:
- Create a comprehensive, well-organized summary that addresses the user's query
- Integrate information from the current summary and ALL additional documents
- Organize information logically with clear sections and subsections
- Include specific details, data, findings, and insights from all sources
- Expand on key concepts with detailed explanations and context
- Include methodological details, technical specifications, and quantitative data
- Avoid redundancy while ensuring completeness
- Maintain scientific accuracy and preserve important technical details
- IMPORTANT: Aim for EXACTLY {target_tokens} tokens - be comprehensive and detailed to reach this target
- Structure the output with clear headings, subheadings, and bullet points where appropriate{current_summary_part}
ADDITIONAL DOCUMENTS TO INTEGRATE ({len(additional_documents)} documents, {docs_tokens:,} tokens):
{docs_text}
CONSOLIDATED COMPREHENSIVE SUMMARY WITH CITATIONS (MUST be approximately {target_tokens:,} tokens):
"""
else:
# Extensive search mode: Generate citations for tracking but will strip them later
prompt = f"""
You are consolidating a comprehensive research summary with additional documents. Your task is to create an enhanced, well-structured summary that integrates all the information effectively WITH PROPER INLINE CITATIONS for source tracking.
User Query: "{query_context}"
CRITICAL CITATION INSTRUCTIONS:
- You MUST include inline citations using the block numbers provided: [block X]
- When you reference information from a document, immediately cite it: [block X]
- Multiple sources for the same point: [block X, block Y]
- Every factual claim, data point, or specific finding MUST be cited
- Citations should be placed at the end of sentences or claims they support
- These citations will be used for source tracking and then processed for final formatting
CONTENT INSTRUCTIONS:
- Create a comprehensive, well-organized summary that addresses the user's query
- Integrate information from the current summary and ALL additional documents
- Organize information logically with clear sections and subsections
- Include specific details, data, findings, and insights from all sources
- Expand on key concepts with detailed explanations and context
- Include methodological details, technical specifications, and quantitative data
- Avoid redundancy while ensuring completeness
- Maintain scientific accuracy and preserve important technical details
- IMPORTANT: Aim for EXACTLY {target_tokens} tokens - be comprehensive and detailed to reach this target
- Structure the output with clear headings, subheadings, and bullet points where appropriate{current_summary_part}
ADDITIONAL DOCUMENTS TO INTEGRATE ({len(additional_documents)} documents, {docs_tokens:,} tokens):
{docs_text}
CONSOLIDATED COMPREHENSIVE SUMMARY WITH CITATIONS (MUST be approximately {target_tokens:,} tokens):
"""
try:
print(f" 🔄 Consolidating with large LLM {'and citations' if use_inline_citations else 'with citations for tracking (will be processed)'}...")
response = large_llm.invoke(prompt)
consolidated_summary = response.content.strip()
# In extensive search mode: extract citations and clean text
if not use_inline_citations:
print(f" 🔧 Processing citations for extensive search mode...")
# Extract all citations from the text for source tracking
import re
citations_found = re.findall(r'\[block \d+\]', consolidated_summary)
unique_citations = list(set(citations_found))
print(f" 📋 Citations found for source tracking: {len(unique_citations)} unique citations")
# Strip all citations from the text for clean reading
clean_summary = re.sub(r'\[block \d+\]', '', consolidated_summary)
# Clean up any double spaces left by citation removal
clean_summary = re.sub(r'\s+', ' ', clean_summary)
# Clean up spacing around punctuation
clean_summary = re.sub(r'\s+([.,;:])', r'\1', clean_summary)
consolidated_summary = clean_summary.strip()
print(f" ✅ Citations extracted and text cleaned for extensive search mode")
# Store citation info for reference building (if needed by calling code)
if hasattr(response, 'citations_extracted'):
response.citations_extracted = unique_citations
else:
# Add citation info as attribute for later use
consolidated_summary = consolidated_summary + f"\n\n<!-- CITATIONS_EXTRACTED: {','.join(unique_citations)} -->"
consolidated_tokens = self.count_tokens(consolidated_summary)
compression_ratio = (summary_tokens + docs_tokens) / consolidated_tokens if consolidated_tokens > 0 else 1
print(f" ✅ Summary consolidated {'with citations' if use_inline_citations else 'and cleaned'}: {consolidated_tokens:,} tokens (compression: {compression_ratio:.1f}x)")
return consolidated_summary
except Exception as e:
print(f" ❌ Error consolidating with large LLM: {e}")
# Fallback to simple concatenation
if use_inline_citations:
# Fallback with basic citations
if summary:
fallback = f"{summary}\n\n**Additional Information:**\n"
else:
fallback = "**Comprehensive Summary:**\n\n"
for i, doc in enumerate(additional_documents):
block_num = start_block_num + i
title = document_metadata[i].get('title', f'Document {i+1}')
fallback += f"\n**From {title} [block {block_num}]:**\n{doc[:500]}{'...' if len(doc) > 500 else ''}\n"
else:
# Fallback without citations (extensive search mode)
if summary:
fallback = f"{summary}\n\n**Additional Information:**\n"
else:
fallback = "**Comprehensive Summary:**\n\n"
for i, doc in enumerate(additional_documents):
title = document_metadata[i].get('title', f'Document {i+1}')
fallback += f"\n**From {title}:**\n{doc[:500]}{'...' if len(doc) > 500 else ''}\n"
return fallback
def process_documents_with_source_tracking(self, documents, query_context, target_summary_tokens=8000, batch_size=10, use_inline_citations=False, disable_citations=False, detail_level="Balanced"):
"""
Process documents with improved source tracking and batch processing using large LLM.
Args:
documents: List of (metadata, content) tuples
query_context: User's query for context
target_summary_tokens: Target length for final summary
batch_size: Number of documents to process in each batch with large LLM
use_inline_citations: Whether to include inline citations (False for extensive search mode)
disable_citations: Whether to completely disable citation logic (True for extensive search mode)
detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive)
Returns:
tuple: (comprehensive_summary, source_mapping, individual_summaries)
"""
print(f" 📚 Processing {len(documents)} documents with source tracking")
print(f" • Batch size: {batch_size} documents per large LLM call")
print(f" • Target summary tokens: {target_summary_tokens:,}")
if disable_citations:
print(f" • Citations: Completely disabled (extensive search mode)")
else:
print(f" • Inline citations: {'Enabled' if use_inline_citations else 'Disabled (extensive search mode)'}")
source_mapping = {} # Maps content sections to source documents
individual_summaries = [] # List of individual document summaries
comprehensive_summary = ""
doc_batch = []
doc_metadata_batch = []
documents_processed = 0
for i, (metadata, content) in enumerate(documents):
documents_processed += 1
doc_batch.append(content)
doc_metadata_batch.append(metadata)
# Store individual document info for source tracking
doc_id = f"doc_{i+1}"
individual_summaries.append({
'id': doc_id,
'metadata': metadata,
'content_preview': content[:200] + "..." if len(content) > 200 else content
})
# Process batch when we reach batch_size or end of documents
if len(doc_batch) >= batch_size or i == len(documents) - 1:
print(f" 🔄 Processing batch {(i//batch_size)+1}: documents {i+2-len(doc_batch)} to {i+1}")
if comprehensive_summary == "":
# First batch: create initial comprehensive summary with generous allocation
if len(doc_batch) == 1:
# Single document: use create_initial_summary with generous initial size
# For single docs, use 90% of target, minimum 1500
initial_target = max(1500, int(target_summary_tokens * 0.9))
print(f" • Single document target: {initial_target:,} tokens (from user target: {target_summary_tokens:,})")
comprehensive_summary = self.create_initial_summary(
doc_batch[0],
query_context,
target_tokens=initial_target,
detail_level=detail_level
)
else:
# Multiple documents: use large LLM for batch processing with generous initial target
# Make the algorithm more responsive to user's target token request
# For targets <= 6000: use 80% of target, minimum 2000
# For targets > 6000: use 70% of target, minimum 3000
if target_summary_tokens <= 6000:
initial_target = max(2000, int(target_summary_tokens * 0.8))
else:
initial_target = max(3000, int(target_summary_tokens * 0.7))
print(f" • Target output: {initial_target:,} tokens (from user target: {target_summary_tokens:,})")
if disable_citations:
# Use simple consolidation without citations
comprehensive_summary = self.consolidate_with_large_llm(
"",
doc_batch,
query_context,
target_tokens=initial_target,
detail_level=detail_level
)
else:
# Use citation-enabled consolidation
comprehensive_summary = self.consolidate_with_large_llm_and_citations(
"",
doc_batch,
doc_metadata_batch,
query_context,
target_tokens=initial_target,
start_block_num=self.rag_instance.block_counter if self.rag_instance else 1,
use_inline_citations=use_inline_citations
)
else:
# Subsequent batches: consolidate with existing summary
if disable_citations:
# Use simple consolidation without citations
comprehensive_summary = self.consolidate_with_large_llm(
comprehensive_summary,
doc_batch,
query_context,
target_tokens=target_summary_tokens,
detail_level=detail_level
)
else:
# Use citation-enabled consolidation
comprehensive_summary = self.consolidate_with_large_llm_and_citations(
comprehensive_summary,
doc_batch,
doc_metadata_batch,
query_context,
target_tokens=target_summary_tokens,
start_block_num=self.rag_instance.block_counter if self.rag_instance else 1,
use_inline_citations=use_inline_citations
)
# Track which documents contributed to current summary section
batch_start = i + 2 - len(doc_batch)
batch_end = i + 1
source_mapping[f"batch_{(i//batch_size)+1}"] = {
'documents': list(range(batch_start, batch_end + 1)),
'metadata': doc_metadata_batch.copy()
}
# Clear batch for next iteration
doc_batch = []
doc_metadata_batch = []
print(f" ✅ Batch processed: {self.count_tokens(comprehensive_summary):,} tokens")
# More aggressive termination logic to reach target size
current_tokens = self.count_tokens(comprehensive_summary)
# Only stop if we've truly exceeded target by a reasonable margin OR processed all documents
if current_tokens >= target_summary_tokens * 1.1: # 110% of target
print(f" 🎯 Significantly exceeded target summary size ({current_tokens:,} > {target_summary_tokens * 1.1:,.0f}) with {documents_processed} documents")
break
elif documents_processed >= len(documents):
print(f" 📋 All documents processed ({documents_processed}/{len(documents)}) - final size: {current_tokens:,} tokens")
break
elif current_tokens >= target_summary_tokens * 0.98 and documents_processed >= len(documents) * 0.8:
print(f" 🎯 Near target size ({current_tokens:,}/{target_summary_tokens:,}) and processed most documents ({documents_processed}/{len(documents)})")
break
print(f" ✅ Document processing complete:")
print(f" • Documents processed: {documents_processed}/{len(documents)}")
print(f" • Final summary: {self.count_tokens(comprehensive_summary):,} tokens")
print(f" • Source batches: {len(source_mapping)}")
return comprehensive_summary, source_mapping, individual_summaries
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
bases: Parameter of type
Return Value
Returns unspecified type
Class Interface
Methods
__init__(self, session, chroma_client, api_key, rag_instance)
Purpose: Internal method: init
Parameters:
session: Parameterchroma_client: Parameterapi_key: Parameterrag_instance: Parameter
Returns: None
count_tokens(self, text)
Purpose: Count tokens in text
Parameters:
text: Parameter
Returns: None
get_full_document(self, chunk_metadata, collection_name)
Purpose: Retrieve the full document that a chunk belongs to by using Neo4j to find all chunks from the same document and reconstruct the complete document. Args: chunk_metadata: Metadata from the chunk containing bibtex path or document info collection_name: Name of the ChromaDB collection Returns: str: Full document text ordered by chunk sequence or None if not found
Parameters:
chunk_metadata: Parametercollection_name: Parameter
Returns: See docstring for return details
get_full_document_neo4j(self, chunk_uid)
Purpose: Retrieve the full document from Neo4j that a chunk belongs to. Updated to match the actual Neo4j schema from offline_docstore_multi_vice.py Args: chunk_uid: UID of the text/table chunk Returns: str: Full document text ordered by chunk sequence or None if not found
Parameters:
chunk_uid: Parameter
Returns: See docstring for return details
get_document_by_uuid(self, document_uid)
Purpose: Retrieve a full document from Neo4j by its document UID. This is used for reference document retrieval from AI responses. Args: document_uid: UID of the document to retrieve Returns: dict: Document information with content, name, and metadata or None if not found
Parameters:
document_uid: Parameter
Returns: See docstring for return details
summarize_document(self, document_text, query_context, max_tokens)
Purpose: Summarize a full document with focus on the query context. Args: document_text: Full document text to summarize query_context: The user's query to focus the summary max_tokens: Maximum tokens for the summary Returns: str: Summarized document text
Parameters:
document_text: Parameterquery_context: Parametermax_tokens: Parameter
Returns: See docstring for return details
remove_duplicate_documents(self, documents_with_metadata, similarity_threshold)
Purpose: Remove duplicate documents based on content similarity. Args: documents_with_metadata: List of (metadata, document_text) tuples similarity_threshold: Threshold for considering documents as duplicates Returns: List of unique documents
Parameters:
documents_with_metadata: Parametersimilarity_threshold: Parameter
Returns: See docstring for return details
_get_detail_level_instructions(self, detail_level)
Purpose: Get detail-level specific instructions for summarization. Args: detail_level: Summary, Balanced, Detailed, or Comprehensive Returns: tuple: (instruction_text, target_multiplier, expansion_guidance)
Parameters:
detail_level: Parameter
Returns: See docstring for return details
create_initial_summary(self, document_text, query_context, target_tokens, detail_level)
Purpose: Create an initial comprehensive summary from the first document. Args: document_text: Text of the first document query_context: The user's query for context target_tokens: Target length for the initial summary detail_level: Summary, Balanced, Detailed, or Comprehensive Returns: str: Initial comprehensive summary
Parameters:
document_text: Parameterquery_context: Parametertarget_tokens: Parameterdetail_level: Parameter
Returns: See docstring for return details
enrich_summary(self, existing_summary, new_document, query_context, target_tokens, detail_level)
Purpose: Enrich an existing summary with information from a new document. Implements smart chunking for large documents to avoid token limit violations. Args: existing_summary: Current comprehensive summary new_document: New document text to integrate query_context: User's query for context target_tokens: Target length for enriched summary detail_level: Summary, Balanced, Detailed, or Comprehensive Returns: str: Enriched comprehensive summary
Parameters:
existing_summary: Parameternew_document: Parameterquery_context: Parametertarget_tokens: Parameterdetail_level: Parameter
Returns: See docstring for return details
_enrich_with_chunking(self, existing_summary, new_document, query_context, target_tokens, detail_level)
Purpose: Enrich summary by processing large documents in chunks to avoid token limits. Args: existing_summary: Current summary new_document: Large document to chunk and process query_context: User's query for context target_tokens: Target output tokens Returns: str: Enriched summary
Parameters:
existing_summary: Parameternew_document: Parameterquery_context: Parametertarget_tokens: Parameterdetail_level: Parameter
Returns: See docstring for return details
_split_document_into_chunks(self, document, max_chunk_tokens)
Purpose: Split a document into chunks of approximately max_chunk_tokens size. Args: document: Text to split max_chunk_tokens: Maximum tokens per chunk Returns: list: List of document chunks
Parameters:
document: Parametermax_chunk_tokens: Parameter
Returns: See docstring for return details
consolidate_with_large_llm(self, summary, additional_documents, query_context, target_tokens, detail_level)
Purpose: Use the large LLM to consolidate a summary with a batch of additional documents. This is more efficient than incremental enrichment for larger batches. Implements dynamic batch sizing to respect token limits. Args: summary: Current summary to consolidate additional_documents: List of document texts to integrate query_context: User's query for context target_tokens: Target length for consolidated summary detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive) Returns: str: Consolidated comprehensive summary
Parameters:
summary: Parameteradditional_documents: Parameterquery_context: Parametertarget_tokens: Parameterdetail_level: Parameter
Returns: See docstring for return details
_consolidate_with_dynamic_batching(self, summary, additional_documents, query_context, target_tokens, detail_level)
Purpose: Consolidate documents using dynamic batch sizing to respect token limits. Args: summary: Current summary additional_documents: List of document texts to integrate query_context: User's query for context target_tokens: Target length for consolidated summary detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive) Returns: str: Consolidated summary
Parameters:
summary: Parameteradditional_documents: Parameterquery_context: Parametertarget_tokens: Parameterdetail_level: Parameter
Returns: See docstring for return details
_calculate_optimal_batch_size(self, current_summary, remaining_documents, target_tokens)
Purpose: Calculate optimal batch size based on token limits and current state. Args: current_summary: Current summary text remaining_documents: List of remaining documents to process target_tokens: Target output tokens Returns: int: Optimal batch size
Parameters:
current_summary: Parameterremaining_documents: Parametertarget_tokens: Parameter
Returns: See docstring for return details
_consolidate_batch_with_large_llm(self, summary, batch_documents, query_context, target_tokens, detail_level)
Purpose: Consolidate a specific batch with the large LLM.
Parameters:
summary: Parameterbatch_documents: Parameterquery_context: Parametertarget_tokens: Parameterdetail_level: Parameter
Returns: None
consolidate_with_large_llm_and_citations(self, summary, additional_documents, document_metadata, query_context, target_tokens, start_block_num, use_inline_citations)
Purpose: Use the large LLM to consolidate a summary with a batch of additional documents, with optional citations. Args: summary: Current summary to consolidate additional_documents: List of document texts to integrate document_metadata: List of metadata for each document query_context: User's query for context target_tokens: Target length for consolidated summary start_block_num: Starting block number for citations use_inline_citations: Whether to include inline citations (False for extensive search mode) Returns: str: Consolidated comprehensive summary with or without inline citations
Parameters:
summary: Parameteradditional_documents: Parameterdocument_metadata: Parameterquery_context: Parametertarget_tokens: Parameterstart_block_num: Parameteruse_inline_citations: Parameter
Returns: See docstring for return details
process_documents_with_source_tracking(self, documents, query_context, target_summary_tokens, batch_size, use_inline_citations, disable_citations, detail_level)
Purpose: Process documents with improved source tracking and batch processing using large LLM. Args: documents: List of (metadata, content) tuples query_context: User's query for context target_summary_tokens: Target length for final summary batch_size: Number of documents to process in each batch with large LLM use_inline_citations: Whether to include inline citations (False for extensive search mode) disable_citations: Whether to completely disable citation logic (True for extensive search mode) detail_level: Level of detail for the summary (Summary, Balanced, Detailed, Comprehensive) Returns: tuple: (comprehensive_summary, source_mapping, individual_summaries)
Parameters:
documents: Parameterquery_context: Parametertarget_summary_tokens: Parameterbatch_size: Parameteruse_inline_citations: Parameterdisable_citations: Parameterdetail_level: Parameter
Returns: See docstring for return details
Required Imports
from typing import List
from typing import Any
from typing import Dict
import os
import panel as pn
Usage Example
# Example usage:
# result = ExtensiveSearchManager(bases)
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class ExtensiveSearchManager 99.0% similar
-
function extensive_mode_example 53.0% similar
-
class ReferenceManager_v4 51.5% similar
-
class ReferenceManager_v3 51.4% similar
-
class ReferenceManager_v2 50.6% similar