function api_send_chat_message_v1
Flask API endpoint that handles sending messages in a chat session, processes them through a RAG (Retrieval-Augmented Generation) engine with configurable LLM models, and returns AI-generated responses with references.
/tf/active/vicechatdev/vice_ai/new_app.py
1893 - 2199
complex
Purpose
This endpoint serves as the main chat interface for a document-based conversational AI system. It retrieves chat session context, validates document and section existence, configures the RAG engine with user-specified settings (model selection, web search, memory, collections), processes user messages with document context, and generates AI responses. Supports multiple LLM providers (OpenAI, Anthropic Claude, Azure OpenAI, Google Gemini), multi-cycle web searches with background processing, and reference extraction from responses.
Source Code
def api_send_chat_message(session_id):
"""Send a message in a chat session"""
try:
chat_session = chat_session_service.get_chat_session(session_id)
if not chat_session:
return jsonify({'error': 'Chat session not found'}), 404
# Get the document and section for context
document = document_service.get_document(chat_session.document_id)
if not document:
return jsonify({'error': 'Document not found'}), 404
section = text_section_service.get_text_section(chat_session.section_id)
if not section:
return jsonify({'error': 'Section not found'}), 404
data = request.get_json()
message = data.get('message', '').strip()
context_documents = data.get('context_documents', [])
config = data.get('config', {})
model = data.get('model', LLM_CONFIG['default_model']) # Get model selection
if not message:
return jsonify({'error': 'Message cannot be empty'}), 400
# Add user message to session
chat_session.add_message('user', message)
chat_session.context_documents = context_documents
chat_session_service.add_message_to_session(session_id, 'user', message)
# Get section content for context
if section and section.content:
section_context = f"Current section content:\n\n{section.content}\n\n"
else:
section_context = ""
# Check if RAG engine is available
if not RAG_AVAILABLE:
return jsonify({'error': 'RAG engine not available'}), 500
# Configure RAG engine model - pass model string directly like SmartStat does
logger.info(f"Chat: Received model selection: '{model}'")
if hasattr(rag_engine, 'flow_control'):
# Simply pass the model string as-is, matching SmartStat's approach
# The RAG engine will determine provider based on model name prefix
if model.startswith('claude'):
# Anthropic Claude models
rag_engine.flow_control['model'] = ["Anthropic", model, 0]
logger.info(f"Chat: Configured RAG engine with Anthropic model: {model}")
elif model.startswith('azure') or model == 'OneCo-gpt':
# Azure OpenAI models
rag_engine.flow_control['model'] = ["Azure", "OneCo-gpt", 0, "2024-02-15-preview"]
logger.info(f"Chat: Configured RAG engine with Azure model: {model}")
elif model.startswith('gpt') or model.startswith('o1'):
# OpenAI models (gpt-4o, gpt-4, o1-preview, etc.)
rag_engine.flow_control['model'] = ["OpenAi", model, 0]
logger.info(f"Chat: Configured RAG engine with OpenAI model: {model}")
elif model.startswith('gemini'):
# Google Gemini models
rag_engine.flow_control['model'] = ["Google", model, 0]
logger.info(f"Chat: Configured RAG engine with Google model: {model}")
else:
# Default to OpenAI with model as-is
rag_engine.flow_control['model'] = ["OpenAi", model, 0]
logger.info(f"Chat: Using model as OpenAI: {model}")
# Log the actual flow_control model setting
logger.info(f"Chat: RAG engine flow_control['model'] = {rag_engine.flow_control['model']}")
else:
logger.warning("Chat: flow_control not available on RAG engine")
# Configure RAG engine with enhanced parameters
if hasattr(rag_engine, 'flow_control'):
# Log current configuration
logger.info(f"Chat: Received configuration: {config}")
rag_engine.flow_control.update({
'enable_search': config.get('enable_search', True),
'enable_web_search': config.get('enableWebSearch', False),
'web_search_queries': config.get('webSearchQueries', 3),
'web_search_cycles': config.get('webSearchCycles', 1),
'enable_memory': config.get('enable_memory', True),
'enable_extensive_search': config.get('enable_extensive_search', False),
'extensive_search_chunks': config.get('extensive_chunks', 200),
'target_summary_tokens': config.get('target_summary_tokens', config.get('summary_tokens', 5000)),
'detail_level': config.get('detail_level', 'detailed'),
'enable_keyword_filtering': config.get('enable_keyword_filtering', False),
'enable_reference_filtering': config.get('enable_reference_filtering', True),
'reference_threshold': config.get('reference_threshold', 0.3),
'manual_keywords': ','.join(config.get('manual_keywords', [])) if isinstance(config.get('manual_keywords', []), list) else config.get('manual_keywords', ''),
'detailed_instructions': config.get('detailed_instructions', '')
})
# Log applied configuration
logger.info(f"Chat: Applied flow_control settings: enable_search={rag_engine.flow_control.get('enable_search')}, enable_memory={rag_engine.flow_control.get('enable_memory')}, enable_extensive_search={rag_engine.flow_control.get('enable_extensive_search')}")
else:
logger.warning("Chat: flow_control not available on RAG engine")
# Set instruction template if provided
template = config.get('instruction_template')
detailed_instructions = config.get('detailed_instructions', '')
if template and hasattr(rag_engine, 'set_instruction_template'):
rag_engine.set_instruction_template(template)
elif template and hasattr(rag_engine, 'current_instruction_template'):
rag_engine.current_instruction_template = template
# Apply detailed instructions if provided
if detailed_instructions and hasattr(rag_engine, 'flow_control'):
rag_engine.flow_control['detailed_instructions'] = detailed_instructions
logger.info(f"Chat: Applied detailed instructions: {len(detailed_instructions)} characters")
elif detailed_instructions:
# For RAG engines that don't support flow_control, try other methods
if hasattr(rag_engine, 'set_instructions'):
rag_engine.set_instructions(detailed_instructions)
elif hasattr(rag_engine, 'instructions'):
rag_engine.instructions = detailed_instructions
# Handle collections - Clear and add selected collections
collections = config.get('collections', [])
if hasattr(rag_engine, 'data_handles'):
try:
# Clear existing data sources first
rag_engine.data_handles.clear_data()
logger.info("Chat: Cleared existing data sources")
# Add selected collections (only if collections are selected)
if collections:
for collection in collections:
available_collections = getattr(rag_engine, 'available_collections', [])
if collection in available_collections:
try:
# Configure processing steps based on user settings
processing_steps = ["similarity", "extend_query"]
# Add collection as chromaDB type
rag_engine.data_handles.add_data(
name=f"Internal data store: {collection}",
type="chromaDB",
data=collection,
filters="",
processing_steps=processing_steps,
inclusions=10,
instructions=""
)
logger.info(f"Chat: ✅ Added ChromaDB collection: {collection} with processing steps: {processing_steps}")
except Exception as e:
logger.error(f"Chat: ❌ Failed to add collection {collection}: {e}")
else:
logger.warning(f"Chat: Collection '{collection}' not available")
logger.info(f"Chat: Added {len(collections)} collections to data sources")
else:
logger.info("Chat: No collections selected - using all available data")
except Exception as e:
logger.error(f"Chat: Error handling collections: {e}")
# Set up context with uploaded documents
# Add uploaded documents from session to context
uploaded_docs_context = ""
session_docs = session.get('documents', {})
if session_docs:
uploaded_docs_context = "\n\nUploaded Documents Context:\n"
for filename, doc_info in session_docs.items():
if 'content' in doc_info and doc_info['content']:
uploaded_docs_context += f"\n--- {filename} ---\n{doc_info['content'][:2000]}...\n"
uploaded_docs_context += "\nPlease consider the uploaded documents when formulating your response.\n"
full_message = section_context + uploaded_docs_context + message
# Check if this is a multi-cycle web search that should run in background
web_search_cycles = config.get('webSearchCycles', 1)
is_multi_cycle = config.get('enableWebSearch', False) and web_search_cycles > 1
if is_multi_cycle:
# Run in background thread for long-running multi-cycle searches
import uuid
job_id = str(uuid.uuid4())
# Initialize progress tracking
text_chat_progress[job_id] = {
'status': 'running',
'progress': 0,
'cycle': 0,
'total_cycles': web_search_cycles,
'message': 'Starting multi-cycle web search...',
'session_id': session_id
}
# Run chat in background thread
def run_chat_background():
import threading
try:
# Progress callback function
def update_progress(progress_info):
text_chat_progress[job_id].update(progress_info)
text_chat_progress[job_id]['progress'] = 5
text_chat_progress[job_id]['message'] = 'Generating initial answer...'
raw_response = rag_engine.response_callback(full_message, progress_callback=update_progress)
text_chat_progress[job_id]['progress'] = 90
text_chat_progress[job_id]['message'] = 'Processing response...'
# Extract markdown from response - don't convert to HTML
if hasattr(raw_response, '_repr_markdown_'):
markdown_response = raw_response._repr_markdown_()
elif hasattr(raw_response, 'object'):
markdown_response = str(raw_response.object)
else:
markdown_response = str(raw_response)
# Send markdown as-is - frontend will handle HTML conversion
response = markdown_response
# Get references
available_references = []
if hasattr(rag_engine, 'get_available_references'):
try:
available_references = rag_engine.get_available_references(response_text=response)
logger.info(f"Chat: Retrieved {len(available_references)} references")
except Exception as e:
logger.warning(f"Could not retrieve references: {e}")
# Add assistant message to session
chat_session_service.add_message_to_session(session_id, 'assistant', response, available_references)
text_chat_progress[job_id]['status'] = 'completed'
text_chat_progress[job_id]['progress'] = 100
text_chat_progress[job_id]['message'] = 'Chat response completed'
text_chat_progress[job_id]['result'] = {
'response': response, # Markdown only
'references': available_references,
'chat_session': chat_session.to_dict(),
'message': 'Response generated successfully'
}
except Exception as e:
logger.error(f"Background chat error: {e}")
import traceback
traceback.print_exc()
text_chat_progress[job_id]['status'] = 'failed'
text_chat_progress[job_id]['progress'] = 0
text_chat_progress[job_id]['message'] = 'Chat failed'
text_chat_progress[job_id]['error'] = str(e)
# Start background thread
import threading
thread = threading.Thread(target=run_chat_background)
thread.daemon = True
thread.start()
# Return job ID immediately
return jsonify({
'success': True,
'job_id': job_id,
'message': 'Multi-cycle web search started in background'
})
# For non-multi-cycle or single cycle, run synchronously as before
# Generate response
try:
raw_response = rag_engine.response_callback(full_message)
# Extract markdown from response - don't convert to HTML
if hasattr(raw_response, '_repr_markdown_'):
markdown_response = raw_response._repr_markdown_()
elif hasattr(raw_response, 'object'):
markdown_response = str(raw_response.object)
else:
markdown_response = str(raw_response)
# Send markdown as-is - frontend will handle HTML conversion
response = markdown_response
# Get references
available_references = []
if hasattr(rag_engine, 'get_available_references'):
try:
available_references = rag_engine.get_available_references(response_text=response)
logger.info(f"Chat: Retrieved {len(available_references)} references")
# Log reference structure for debugging
for i, ref in enumerate(available_references[:3]): # Log first 3 references
logger.info(f"Reference {i+1}: {ref}")
except Exception as e:
logger.warning(f"Could not retrieve references: {e}")
# Add assistant message to session
chat_session_service.add_message_to_session(session_id, 'assistant', response, available_references)
return jsonify({
'response': response, # Markdown only
'references': available_references,
'chat_session': chat_session.to_dict(),
'message': 'Response generated successfully'
})
except Exception as e:
logger.error(f"Chat response error: {e}")
return jsonify({'error': f'Failed to generate response: {str(e)}'}), 500
except Exception as e:
logger.error(f"Send chat message error: {e}")
return jsonify({'error': 'Failed to send message'}), 500
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
session_id |
- | - | positional_or_keyword |
Parameter Details
session_id: String identifier for the chat session. Used to retrieve the existing chat session, associated document, and section context. Must correspond to a valid chat session in the database.
Return Value
Returns a Flask JSON response. On success: {'response': 'markdown-formatted AI response', 'references': [list of reference objects], 'chat_session': {session dict}, 'message': 'success message'} with status 200. For multi-cycle web searches: {'success': True, 'job_id': 'uuid', 'message': 'background job started'}. On error: {'error': 'error message'} with status 404 (not found), 400 (bad request), or 500 (server error).
Dependencies
flasklogginguuidthreadingjsontracebackmodelsserviceshybrid_rag_engine
Required Imports
from flask import Flask, request, jsonify, session
import logging
import uuid
import threading
from models import ChatSession
from services import ChatSessionService, DocumentService, TextSectionService
from hybrid_rag_engine import OneCo_hybrid_RAG
Conditional/Optional Imports
These imports are only needed under specific conditions:
import uuid
Condition: only when multi-cycle web search is enabled (webSearchCycles > 1)
Required (conditional)import threading
Condition: only when multi-cycle web search is enabled for background processing
Required (conditional)import traceback
Condition: only for error handling in background threads
Required (conditional)Usage Example
# Example POST request to the endpoint
import requests
session_id = 'abc123-session-id'
url = f'http://localhost:5000/api/chat-sessions/{session_id}/messages'
payload = {
'message': 'What are the key findings in this document?',
'model': 'gpt-4o',
'config': {
'enable_search': True,
'enableWebSearch': False,
'enable_memory': True,
'enable_extensive_search': False,
'collections': ['research_papers', 'technical_docs'],
'detail_level': 'detailed',
'instruction_template': 'analytical',
'detailed_instructions': 'Focus on quantitative results'
},
'context_documents': []
}
response = requests.post(url, json=payload)
if response.status_code == 200:
data = response.json()
print(f"AI Response: {data['response']}")
print(f"References: {len(data['references'])} found")
elif 'job_id' in response.json():
# Multi-cycle search started in background
job_id = response.json()['job_id']
print(f"Background job started: {job_id}")
Best Practices
- Always validate that the chat session, document, and section exist before processing messages
- Ensure RAG_AVAILABLE flag is checked before attempting to use the RAG engine
- For multi-cycle web searches (webSearchCycles > 1), use background processing to avoid request timeouts
- Clear existing ChromaDB collections before adding new ones to avoid context pollution
- Pass model selection strings directly to RAG engine flow_control matching the provider prefix (claude-, gpt-, azure-, gemini-)
- Return markdown responses without HTML conversion - let the frontend handle rendering
- Always attempt to extract references using get_available_references() after generating responses
- Log all configuration changes to flow_control for debugging model and search settings
- Handle uploaded documents from Flask session and include them in context
- Use progress callbacks for long-running operations to provide user feedback
- Store both user and assistant messages in the chat session for conversation history
- Validate that message content is not empty before processing
- Use try-except blocks around RAG engine calls to handle provider-specific errors gracefully
- For background jobs, initialize progress tracking before starting the thread
- Set thread.daemon = True for background threads to prevent blocking application shutdown
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function api_send_chat_message 91.6% similar
-
function api_chat 87.6% similar
-
function chat 84.7% similar
-
function chat_with_text_section 76.1% similar
-
function process_chat_background 69.8% similar