function api_chat
Flask API endpoint that handles chat requests asynchronously, processing user queries through a RAG (Retrieval-Augmented Generation) engine with support for multiple modes, memory, web search, and custom configurations.
/tf/active/vicechatdev/docchat/app.py
962 - 1035
complex
Purpose
This endpoint serves as the main chat interface for a document-based conversational AI system. It accepts user messages, processes them through different modes (basic, extensive, full_reading), manages chat history with optional memory, supports source filtering, manual keyword search, and web search capabilities. All processing is done asynchronously in background threads to handle long-running operations without blocking the API response.
Source Code
def api_chat():
"""Handle chat requests with async support for long-running modes"""
try:
data = request.json
logger.info(f"🔍 RAW REQUEST DATA: {data}")
query = data.get('message', '').strip()
mode = data.get('mode', 'basic')
model = data.get('model', config.DEFAULT_MODEL)
output_language = data.get('output_language', 'en')
session_id = session.get('session_id')
# Get configuration options
config_opts = data.get('config', {})
logger.info(f"🔍 EXTRACTED CONFIG_OPTS: {config_opts}")
logger.info(f"🔍 override_sources in config_opts: {config_opts.get('override_sources', 'KEY NOT FOUND')}")
enable_memory = config_opts.get('enable_memory', True)
enable_web_search = config_opts.get('enable_web_search', False)
source_filters = config_opts.get('source_filters', []) # Selected files/folders
manual_keywords = config_opts.get('manual_keywords', []) # Manual search keywords
custom_instructions = config_opts.get('custom_instructions', '') # Custom instructions for LLM
# Add output_language to config_opts for background processing
config_opts['output_language'] = output_language
if not query:
return jsonify({'error': 'Empty query'}), 400
if not rag_engine:
return jsonify({'error': 'RAG engine not initialized'}), 500
# Get chat history
chat_session = get_or_create_session(session_id)
full_chat_history = chat_session['messages'] if enable_memory else []
# Clean chat history for LLM context (remove metadata and references, keep only role and content)
# The full history with metadata/references is saved for UI restoration
chat_history = [
{'role': msg['role'], 'content': msg['content']}
for msg in full_chat_history
] if enable_memory else []
# Add user message to history
add_message_to_session(session_id, 'user', query)
# ALL modes now use async processing for better UX with progress indicators
# Start background processing
task_id = str(uuid_module.uuid4())
create_task(task_id, session_id)
thread = threading.Thread(
target=process_chat_background,
args=(task_id, query, mode, model, chat_history, session_id, config_opts, source_filters, manual_keywords),
daemon=True
)
thread.start()
# Provide estimated time based on mode
if mode == 'full_reading':
estimated_time = "5-10 minutes"
elif mode == 'extensive':
estimated_time = "1-3 minutes"
else: # basic mode
estimated_time = "10-30 seconds"
return jsonify({
'task_id': task_id,
'status': 'processing',
'message': f'Processing {mode} mode...',
'estimated_time': estimated_time
})
except Exception as e:
logger.error(f"API error: {e}")
return jsonify({'error': 'Internal server error'}), 500
Return Value
Returns a JSON response with status 200 containing: 'task_id' (UUID string for tracking the async task), 'status' (always 'processing'), 'message' (description of current processing mode), and 'estimated_time' (string indicating expected completion time based on mode). On error, returns JSON with 'error' key and appropriate HTTP status code (400 for empty query, 500 for server errors).
Dependencies
flaskuuidthreadingloggingpathlibwerkzeugfunctoolsdatetimejsonostimepython-docxreportlabiotraceback
Required Imports
from flask import Flask, render_template, request, jsonify, session, redirect, url_for, send_file
import os
import json
import logging
from datetime import datetime, timedelta
import uuid as uuid_module
from pathlib import Path
import time
import threading
from threading import Lock
from werkzeug.utils import secure_filename
from functools import wraps
import config
from rag_engine import DocChatRAG
from document_indexer import DocumentIndexer
from auth.azure_auth import setup_azure_sso, validate_azure_token
from docx import Document
from docx.shared import Inches, Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from io import BytesIO
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.lib.colors import HexColor
import traceback
Usage Example
# Example client-side request to this endpoint
import requests
# Assuming Flask app is running on localhost:5000
url = 'http://localhost:5000/api/chat'
# Login first to get session cookie
session = requests.Session()
# ... perform login ...
# Make chat request
payload = {
'message': 'What are the key findings in the Q4 report?',
'mode': 'extensive',
'model': 'gpt-4',
'output_language': 'en',
'config': {
'enable_memory': True,
'enable_web_search': False,
'source_filters': ['reports/Q4_2023.pdf'],
'manual_keywords': ['revenue', 'growth'],
'custom_instructions': 'Focus on financial metrics'
}
}
response = session.post(url, json=payload)
result = response.json()
# Result contains task_id for polling status
task_id = result['task_id']
print(f"Task started: {task_id}")
print(f"Estimated time: {result['estimated_time']}")
# Poll for completion (separate endpoint needed)
# GET /api/task/{task_id} to check status
Best Practices
- Always validate that rag_engine is initialized before processing requests
- Use the returned task_id to poll for completion status via a separate endpoint
- Implement proper error handling on the client side for 400 and 500 status codes
- Consider rate limiting this endpoint as it spawns background threads
- Ensure session management is properly configured with secure session keys
- The function spawns daemon threads which will be terminated when the main process exits
- Chat history is cleaned (metadata removed) before passing to LLM but full history is preserved for UI
- Different modes have significantly different processing times - inform users appropriately
- The endpoint requires authentication via login_required decorator
- Source filters and manual keywords can be combined for more precise document retrieval
- Custom instructions allow fine-tuning LLM behavior per request
- Memory can be disabled for stateless queries to improve performance
- Monitor thread pool size in production to prevent resource exhaustion
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function chat 88.4% similar
-
function api_send_chat_message_v1 87.6% similar
-
function api_send_chat_message 86.7% similar
-
function process_chat_background 76.4% similar
-
function analysis_chat 70.6% similar