StatisticalAgent - Code Extractor

class StatisticalAgent

Maturity: 25

LLM-powered statistical analysis agent

File:
/tf/active/vicechatdev/vice_ai/statistical_agent.py

Lines:
32 - 1150

Complexity:
moderate

Purpose

LLM-powered statistical analysis agent

Source Code

class StatisticalAgent:
    """LLM-powered statistical analysis agent"""
    
    def __init__(self, config):
        self.config = config
        self.setup_llm_clients()
        
    def setup_llm_clients(self):
        """Initialize LLM clients following your existing pattern"""
        # OpenAI client
        if OPENAI_AVAILABLE and self.config.OPENAI_API_KEY:
            self.openai_client = openai.OpenAI(api_key=self.config.OPENAI_API_KEY)
        else:
            self.openai_client = None
            
        # Azure OpenAI client
        if OPENAI_AVAILABLE and self.config.AZURE_OPENAI_API_KEY:
            self.azure_client = openai.AzureOpenAI(
                azure_endpoint=self.config.AZURE_OPENAI_ENDPOINT,
                api_key=self.config.AZURE_OPENAI_API_KEY,
                api_version="2024-02-15-preview"
            )
        else:
            self.azure_client = None
            
        # Gemini client
        if GEMINI_AVAILABLE and self.config.GEMINI_API_KEY:
            genai.configure(api_key=self.config.GEMINI_API_KEY)
            self.gemini_client = genai.GenerativeModel('gemini-pro')
        else:
            self.gemini_client = None
    
    def interpret_user_query(self, user_query: str, data_summary: Dict[str, Any], 
                           available_columns: List[str], model: str = 'gpt-4o') -> Dict[str, Any]:
        """
        Interpret natural language query and suggest analysis configuration
        """
        prompt = self._build_interpretation_prompt(user_query, data_summary, available_columns)
        
        try:
            response = self._call_llm(prompt, model=model, max_tokens=1000)
            
            # Parse the structured response
            analysis_plan = self._parse_analysis_plan(response)
            
            return {
                'success': True,
                'analysis_plan': analysis_plan,
                'suggested_config': self._build_analysis_config(analysis_plan),
                'interpretation': response,
                'confidence': analysis_plan.get('confidence', 0.8)
            }
            
        except Exception as e:
            logger.error(f"Error interpreting user query: {str(e)}")
            return {
                'success': False,
                'error': str(e),
                'fallback_suggestions': self._get_fallback_suggestions(user_query, available_columns)
            }
    
    def generate_analysis_script(self, analysis_config: AnalysisConfiguration, 
                               data_summary: Dict[str, Any],
                               user_query: str,
                               model: str = 'gpt-4o',
                               previous_context: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Generate Python script for statistical analysis with optional previous context
        """
        prompt = self._build_script_generation_prompt(analysis_config, data_summary, user_query, previous_context)
        
        # DEBUG: Log the prompt to see what's being sent to LLM
        logger.info("=" * 80)
        logger.info("PROMPT SENT TO LLM:")
        logger.info("=" * 80)
        logger.info(prompt[:2000])  # Log first 2000 chars
        logger.info("...")
        logger.info("=" * 80)
        
        try:
            # Increased from 2000 to 8000 to avoid truncation of complex scripts
            response = self._call_llm(prompt, model=model, max_tokens=8000)
            
            # Extract script and explanation
            script, explanation = self._parse_script_response(response)
            
            # ROBUST SOLUTION: Always wrap script with mandatory components
            # This ensures JSON saving happens even if LLM forgets
            script = self._ensure_script_completeness(script)
            
            return {
                'success': True,
                'script': script,
                'explanation': explanation,
                'estimated_runtime': self._estimate_runtime(script),
                'required_libraries': self._extract_required_libraries(script)
            }
            
        except Exception as e:
            logger.error(f"Error generating analysis script: {str(e)}")
            return {
                'success': False,
                'error': str(e),
                'fallback_script': self._generate_basic_script(analysis_config)
            }
    
    def generate_requirements_txt(self, analysis_config: AnalysisConfiguration, 
                                data_summary: Dict[str, Any],
                                user_query: str, model: str = 'gpt-4o') -> Dict[str, Any]:
        """
        Generate requirements.txt for the analysis script
        """
        prompt = self._build_requirements_prompt(analysis_config, data_summary, user_query)
        
        try:
            response = self._call_llm(prompt, model=model, max_tokens=800)
            
            # Extract requirements and explanation
            requirements, explanation = self._parse_requirements_response(response)
            
            return {
                'success': True,
                'requirements': requirements,
                'explanation': explanation
            }
            
        except Exception as e:
            logger.error(f"Error generating requirements.txt: {str(e)}")
            return {
                'success': False,
                'error': str(e),
                'fallback_requirements': self._get_default_requirements()
            }
    
    def debug_script_error(self, script: str, error_message: str, 
                         data_summary: Dict[str, Any], iteration: int = 1, model: str = 'gpt-4o') -> Dict[str, Any]:
        """
        Debug script errors using LLM (agent-mode debugging)
        """
        prompt = self._build_debugging_prompt(script, error_message, data_summary, iteration)
        
        try:
            response = self._call_llm(prompt, model=model, max_tokens=1500)
            
            # Parse debugging response
            fixed_script, explanation = self._parse_debug_response(response)
            
            return {
                'success': True,
                'fixed_script': fixed_script,
                'explanation': explanation,
                'debug_iteration': iteration,
                'confidence': 0.9 - (iteration * 0.1)  # Decreasing confidence with iterations
            }
            
        except Exception as e:
            logger.error(f"Error debugging script: {str(e)}")
            return {
                'success': False,
                'error': str(e),
                'iteration': iteration
            }
    
    def interpret_results(self, results: Dict[str, Any], analysis_config: AnalysisConfiguration,
                         user_query: str, model: str = 'gpt-4o', info_sheets: Dict[str, str] = None,
                         interpretation_template: str = None) -> Dict[str, Any]:
        """
        Generate interpretation and insights from analysis results
        """
        prompt = self._build_interpretation_results_prompt(results, analysis_config, user_query, info_sheets, interpretation_template)
        
        try:
            # Increased from 1500 to 8000 tokens to allow comprehensive interpretations
            # Statistical analysis interpretations often need more space for detailed findings,
            # assumptions checks, normality tests, post-hoc comparisons, etc.
            response = self._call_llm(prompt, model=model, max_tokens=8000)
            
            return {
                'success': True,
                'interpretation': response,
                'key_findings': self._extract_key_findings(response),
                'recommendations': self._extract_recommendations(response)
            }
            
        except Exception as e:
            logger.error(f"Error interpreting results: {str(e)}")
            return {
                'success': False,
                'error': str(e),
                'basic_interpretation': self._generate_basic_interpretation(results)
            }
    
    def _build_interpretation_prompt(self, user_query: str, data_summary: Dict[str, Any], 
                                   available_columns: List[str]) -> str:
        """Build prompt for query interpretation"""
        return f"""You are a statistical analysis expert. Analyze the user's request and suggest the most appropriate statistical analysis.

USER QUERY: "{user_query}"

DATASET INFORMATION:
- Shape: {data_summary.get('shape', 'Unknown')}
- Available columns: {', '.join(available_columns)}
- Column details: {json.dumps(data_summary.get('column_info', {}), indent=2)}

TASK: Interpret the user's request and provide a structured analysis plan in JSON format:

{{
    "analysis_type": "descriptive|hypothesis_test|regression|correlation|anova|control_chart|custom",
    "target_variables": ["list", "of", "target", "variables"],
    "grouping_variables": ["list", "of", "grouping", "variables"],
    "statistical_tests": ["list", "of", "specific", "tests"],
    "assumptions_to_check": ["normality", "homogeneity", "independence", "etc"],
    "significance_level": 0.05,
    "description": "Detailed description of the analysis plan",
    "confidence": 0.9,
    "warnings": ["any", "potential", "issues"]
}}

Focus on:
1. Descriptive statistics and hypothesis testing
2. Control chart analysis for quality data
3. Checking statistical assumptions
4. Appropriate test selection based on data types and distribution

Provide only the JSON response."""
    
    def _build_script_generation_prompt(self, analysis_config: AnalysisConfiguration,
                                      data_summary: Dict[str, Any], user_query: str,
                                      previous_context: Dict[str, Any] = None) -> str:
        """Build prompt for Python script generation with optional previous context"""
        
        # Build the base prompt
        prompt_parts = []
        
        prompt_parts.append("""You are a Python statistical programming expert. Generate a complete Python script for statistical analysis.

🔴 CRITICAL REQUIREMENT 🔴
Your script MUST end by saving results to 'analysis_results.json' file. 
Without this file, the analysis will FAIL and interpretation cannot be generated.
Every script you generate MUST include the JSON save code at the end.
""")
        
        # Add previous context if available
        if previous_context and previous_context.get('count', 0) > 0:
            context_type = previous_context.get('type', 'previous')
            prompt_parts.append(f"\nPREVIOUS ANALYSIS CONTEXT:")
            if context_type == 'selected':
                prompt_parts.append(f"This is a follow-up analysis. The user has specifically selected {previous_context['count']} previous analyses to reference:")
            else:
                prompt_parts.append(f"This is a follow-up analysis in an iterative session. Here's what was done previously:")
            prompt_parts.append(previous_context['summary'])
            
            # Include detailed context from the most recent previous analysis
            if previous_context['analyses']:
                recent_analysis = previous_context['analyses'][-1]  # Most recent
                prompt_parts.append(f"\nMOST RECENT ANALYSIS DETAILS:")
                prompt_parts.append(f"Query: {recent_analysis.get('user_query', 'Unknown')}")
                
                if recent_analysis.get('conclusions'):
                    prompt_parts.append(f"Previous conclusions: {recent_analysis['conclusions'][:500]}...")
                
                if recent_analysis.get('results_summary'):
                    prompt_parts.append(f"Previous results files generated:")
                    for result in recent_analysis['results_summary'][:3]:  # Limit to first 3 files
                        prompt_parts.append(f"- {result['filename']}: {result['preview'][:200]}...")
                
                if recent_analysis.get('script_content'):
                    # Include key parts of previous script for reference
                    script_lines = recent_analysis['script_content'].split('\n')
                    imports_and_key_code = []
                    for line in script_lines[:50]:  # First 50 lines typically contain imports and key setup
                        if line.strip().startswith(('import ', 'from ', 'def ', '# Key')) or 'matplotlib' in line or 'seaborn' in line:
                            imports_and_key_code.append(line)
                    
                    if imports_and_key_code:
                        prompt_parts.append(f"\nPrevious script key components (for reference):")
                        prompt_parts.append('\n'.join(imports_and_key_code[:20]))  # Limit lines
            
            prompt_parts.append(f"\nIMPORTANT: Build upon previous work. Reference previous findings. Avoid repeating exactly the same analysis unless specifically requested.")
        
        prompt_parts.append(f"\nCURRENT REQUEST: \"{user_query}\"")
        
        # Build the base prompt first (but don't return yet!)
        prompt = f"""{chr(10).join(prompt_parts)}

ANALYSIS CONFIGURATION:
- Type: {analysis_config.analysis_type.value}
- Target variables: {analysis_config.target_variables}
- Grouping variables: {analysis_config.grouping_variables}
- Significance level: {analysis_config.significance_level}

DATASET INFO:
"""
        
        # Handle multi-dataset or single dataset
        summary_type = data_summary.get('type', 'NONE')
        datasets_count = len(data_summary.get('datasets', {}))
        
        print(f"\n{'='*80}")
        print(f"STATISTICAL_AGENT DEBUG:")
        print(f"  data_summary['type'] = '{summary_type}'")
        print(f"  len(data_summary.get('datasets')) = {datasets_count}")
        print(f"  Condition check: data_summary.get('type') == 'multi-dataset' ? {summary_type == 'multi-dataset'}")
        print(f"{'='*80}\n")
        
        logger.info(f"DEBUG: data_summary type = {summary_type}, has datasets = {datasets_count}")
        
        if data_summary.get('type') == 'multi-dataset':
            prompt += f"""
This session has MULTIPLE DATASETS ({data_summary.get('dataset_count')} total):

"""
            # Build list of dataset loading instructions
            dataset_load_commands = []
            logger.info(f"Processing {len(data_summary.get('datasets', {}))} datasets for prompt")
            for dataset_name, dataset_info in data_summary.get('datasets', {}).items():
                # Get the actual CSV filename from data_summary (calculated in smartstat_service.py)
                csv_filename = dataset_info.get('csv_filename', 'data.csv')
                logger.info(f"Dataset '{dataset_name}' → CSV file: '{csv_filename}'")
                
                # Create variable name from filename (without .csv)
                var_name = csv_filename.replace('.csv', '')
                
                prompt += f"""
Dataset: '{dataset_name}'
- Load from file: {csv_filename}
- Rows: {dataset_info.get('rows')}
- Columns: {dataset_info.get('columns')}
- Column names (USE EXACT NAMES): {dataset_info.get('column_names', [])}
- Column types: {json.dumps(dataset_info.get('dtypes', {}), indent=2)}
- Numeric columns: {dataset_info.get('numeric_columns', [])}

"""
                dataset_load_commands.append(f"{var_name} = pd.read_csv('{csv_filename}')")
            
            # Add information sheets context if available
            info_sheets = data_summary.get('info_sheets', {})
            logger.info(f"DEBUG: info_sheets keys = {list(info_sheets.keys()) if info_sheets else 'None'}")
            if info_sheets:
                logger.info(f"Adding {len(info_sheets)} information sheets to prompt")
                prompt += f"""
ADDITIONAL CONTEXT FROM INFORMATION SHEETS:
The following information sheets provide study context and metadata:

"""
                for sheet_name, context in info_sheets.items():
                    logger.info(f"  Info sheet '{sheet_name}': {len(context)} characters")
                    # Include full context (up to 2000 chars) for better understanding
                    truncated_context = context[:2000] if len(context) > 2000 else context
                    truncation_note = "... (truncated for token limits)" if len(context) > 2000 else ""
                    prompt += f"""
--- {sheet_name} ---
{truncated_context}{truncation_note}

"""
            else:
                logger.info("No information sheets to add to prompt")
            
            prompt += f"""
MULTI-DATASET LOADING INSTRUCTIONS:
1. Load each dataset using these EXACT commands:
{chr(10).join('   ' + cmd for cmd in dataset_load_commands)}

2. You can analyze each dataset separately or combine them if appropriate
3. If combining datasets, consider using pd.concat() or pd.merge() depending on the relationship
4. CRITICAL: Use the EXACT column names from each dataset - DO NOT rename or convert them
5. ⚠️ ABSOLUTELY CRITICAL: The CSV files are already present in the working directory
6. ⚠️ IF FILE LOADING FAILS: Print a clear error message and EXIT immediately - DO NOT create dummy/sample data
7. ⚠️ NEVER create sample datasets, fabricated data, or placeholder data - this is real research data
8. Use the contextual information from information sheets to better understand the study design and interpret results

"""
        else:
            # Single dataset
            prompt += f"""
- Rows: {data_summary.get('rows', 'Unknown')}
- Columns: {data_summary.get('columns', 'Unknown')}
- Column names (USE THESE EXACT NAMES): {data_summary.get('column_names', [])}
- Column types: {json.dumps(data_summary.get('dtypes', {}), indent=2)}
- Numeric columns: {data_summary.get('numeric_columns', [])}

"""
        
        prompt += """
REQUIREMENTS:
1. MUST include all import statements (pandas, numpy, matplotlib, etc.) - this is a standalone script
2. MUST load data using pd.read_csv() from the appropriate CSV file(s)
3. CRITICAL COLUMN NAMES: Use the EXACT column names from the dataset info above - DO NOT rename or convert them (e.g., if the column is 'Egg Weight (g)', use exactly that, not 'egg_weight')
4. When accessing columns with spaces or special characters, use bracket notation: df['Egg Weight (g)'] not df.egg_weight
5. **CRITICAL - MISSING VALUES**: Properly handle missing/NaN values in all analyses:
   - Use .dropna() when calculating statistics on columns with missing values
   - For group comparisons, remove NaN before analysis: df.groupby('group')['value'].apply(lambda x: x.dropna())
   - For plotting, filter out NaN values to avoid display issues: df[df['column'].notna()]
   - Document how many values were excluded due to missing data
   - NEVER include NaN in group counts or statistical calculations
6. Include proper error handling and data validation (check for NaN values, ensure numeric columns are properly typed)
7. Generate clear plots with proper titles and labels (include sample sizes: n=X)
8. Store ONLY statistical results and data in the results dictionary - NO INTERPRETATION TEXT
9. Include statistical assumptions checking
10. Add comments explaining each step
11. Save plots to files using plt.savefig() and store filenames in results['plots']
12. At the end, save results dictionary to 'analysis_results.json' file
13. **CRITICAL**: Do NOT generate interpretation or conclusion text in the script - this will be done separately by a different AI model

EXECUTION ENVIRONMENT:
- Script runs standalone via subprocess in a virtual environment
- Data is provided in CSV file(s) (same directory as script)
- Script must be completely self-contained with all imports
- Results must be saved to 'analysis_results.json' file

SCRIPT STRUCTURE EXAMPLE:
```python
# Import all required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
import json

warnings.filterwarnings('ignore')

# Initialize results dictionary
results = {{}}

# Load data from CSV file
df = pd.read_csv('data.csv')

# CRITICAL: Handle missing values properly
# Example - Group comparison with missing values
numeric_col = 'Egg weight'
group_col = 'Farm'

# Remove NaN values before analysis
df_clean = df[[group_col, numeric_col]].dropna()

# Calculate statistics per group
group_stats = df_clean.groupby(group_col)[numeric_col].agg([
    ('mean', 'mean'),
    ('std', 'std'),
    ('n', 'count'),  # Use count to get actual sample sizes
    ('min', 'min'),
    ('max', 'max')
]).round(2)

# For plotting, use clean data
plt.figure(figsize=(10, 6))
for group in df_clean[group_col].unique():
    group_data = df_clean[df_clean[group_col] == group][numeric_col]
    n = len(group_data)
    plt.hist(group_data, alpha=0.5, label=f'{{group}} (n={{n}})')
plt.xlabel(numeric_col)
plt.ylabel('Frequency')
plt.title(f'Distribution of {{numeric_col}} by {{group_col}}')
plt.legend()
plt.savefig('plot1.png', dpi=300, bbox_inches='tight')
results['plots'] = ['plot1.png']
plt.close()

# Store ONLY numeric/statistical results - NO text interpretation
results['summary_statistics'] = group_stats.to_dict()
results['missing_values_excluded'] = len(df) - len(df_clean)
results['test_results'] = {{
    'statistic': 2.45,
    'p_value': 0.014,
    'test_name': 't-test'
}}
```
# DO NOT add results['interpretation'] or results['conclusions'] - this is handled separately

# Save plots and track filenames
results['plots'] = []
plt.figure()
# ... create plot ...
plt.savefig('plot1.png')
results['plots'].append('plot1.png')
plt.close()

# CRITICAL: Clean NaN/Infinity values before saving JSON
# Replace NaN and Inf with None to ensure valid JSON
import math
def clean_for_json(obj):
    if isinstance(obj, dict):
        return {{k: clean_for_json(v) for k, v in obj.items()}}
    elif isinstance(obj, list):
        return [clean_for_json(item) for item in obj]
    elif isinstance(obj, float):
        if math.isnan(obj) or math.isinf(obj):
            return None
        return obj
    return obj

results = clean_for_json(results)

# Save results to JSON file
with open('analysis_results.json', 'w') as f:
    json.dump(results, f, indent=2, default=str)

print("Analysis complete - results saved to analysis_results.json")
```

**MANDATORY FINAL STEPS** - Your script MUST end with these exact lines:
```python
# MANDATORY: Clean and save results to JSON
import math

def clean_for_json(obj):
    if isinstance(obj, dict):
        return {{k: clean_for_json(v) for k, v in obj.items()}}
    elif isinstance(obj, list):
        return [clean_for_json(item) for item in obj]
    elif isinstance(obj, float):
        if math.isnan(obj) or math.isinf(obj):
            return None
        return obj
    return obj

results = clean_for_json(results)

with open('analysis_results.json', 'w') as f:
    json.dump(results, f, indent=2, default=str)

print("Analysis complete - results saved to analysis_results.json")
```

**CRITICAL REQUIREMENTS**:
- ✅ MUST save results to 'analysis_results.json' - THIS IS MANDATORY
- ✅ MUST include the clean_for_json function to handle NaN/Inf values
- ✅ MUST use json.dump() with indent=2 and default=str
- ✅ Focus on DATA and STATISTICS only - NO textual interpretation
- ✅ All interpretation will be generated separately from the JSON results
- ❌ DO NOT add results['interpretation'] or results['conclusions'] fields

**SCRIPT VALIDATION CHECKLIST** - Every script must have:
1. ✓ Import json at the top
2. ✓ Initialize results = {{}} dictionary early
3. ✓ Add statistical results to results dictionary
4. ✓ Add plot filenames to results['plots']
5. ✓ Include clean_for_json() function
6. ✓ Call clean_for_json(results) before saving
7. ✓ Save with open('analysis_results.json', 'w') and json.dump()
8. ✓ Print confirmation message

Provide the complete Python script with detailed comments. ENSURE it ends with the JSON save code shown above.
"""
        
        # CRITICAL: Return the completed prompt!
        return prompt
    
    def _build_debugging_prompt(self, script: str, error_message: str,
                              data_summary: Dict[str, Any], iteration: int) -> str:
        """Build prompt for script debugging"""
        return f"""You are a Python debugging expert. Fix the error in this statistical analysis script.

ORIGINAL SCRIPT:
```python
{script}
```

ERROR MESSAGE:
{error_message}

DATASET INFO:
- Rows: {data_summary.get('rows', 'Unknown')}
- Columns: {data_summary.get('columns', 'Unknown')}
- Available column names (USE EXACT NAMES): {data_summary.get('column_names', [])}
- Column types: {json.dumps(data_summary.get('dtypes', {}), indent=2)}

DEBUG ITERATION: {iteration}

REQUIREMENTS:
1. Identify the root cause of the error
2. Provide a fixed version of the script
3. Explain what was wrong and how you fixed it
4. Ensure the fix maintains the original analysis intent
5. Add additional error handling if needed
6. Remember: Script must be standalone with all imports
7. Data is loaded from 'data.csv' file
8. Results must be saved to 'analysis_results.json'
9. CRITICAL: Use EXACT column names from the dataset (with spaces, parentheses, capitals as shown above)
10. Use bracket notation for column access: df['Column Name'] not df.column_name

RESPONSE FORMAT:
EXPLANATION: [Explain the error and your fix]

FIXED SCRIPT:
```python
[Your fixed Python code here - complete standalone script with imports]
```

Focus on common issues:
- Column name mismatches (MUST use exact names: 'Egg Weight (g)' not 'egg_weight' or 'Egg_Weight_g')
- Using dot notation instead of bracket notation for columns with spaces/special chars
- Data type problems
- Missing data handling
- Library import errors
- Statistical test requirements
- Data loading from 'data.csv'
- Results saving to 'analysis_results.json'"""
    
    def _build_interpretation_results_prompt(self, results: Dict[str, Any],
                                           analysis_config: AnalysisConfiguration,
                                           user_query: str,
                                           info_sheets: Dict[str, str] = None,
                                           interpretation_template: str = None) -> str:
        """Build prompt for results interpretation"""
        
        # Build base prompt
        prompt = f"""You are a statistical analyst. Interpret these analysis results for a non-technical audience.

USER QUERY: "{user_query}"
ANALYSIS TYPE: {analysis_config.analysis_type.value}
SIGNIFICANCE LEVEL: {analysis_config.significance_level}

RESULTS:
{json.dumps(results, indent=2, default=str)}
"""
        
        # Add information sheets context if available
        if info_sheets:
            logger.info(f"Adding {len(info_sheets)} information sheets to interpretation prompt")
            prompt += f"""
ADDITIONAL CONTEXT FROM STUDY INFORMATION:
The following information sheets provide important study context for interpreting results:

"""
            for sheet_name, context in info_sheets.items():
                logger.info(f"  Info sheet '{sheet_name}': {len(context)} characters")
                # Include full context (up to 2000 chars) for better understanding
                truncated_context = context[:2000] if len(context) > 2000 else context
                truncation_note = "... (truncated for token limits)" if len(context) > 2000 else ""
                prompt += f"""
--- {sheet_name} ---
{truncated_context}{truncation_note}

"""
        
        # Add interpretation instructions (template or default)
        if interpretation_template:
            logger.info("Using custom interpretation template")
            prompt += f"""
INTERPRETATION INSTRUCTIONS:
{interpretation_template}
"""
        else:
            # Default interpretation instructions
            prompt += """
TASK: Provide a clear, comprehensive interpretation including:

1. SUMMARY OF FINDINGS
   - What the analysis shows in plain language
   - Key statistics and their meaning

2. STATISTICAL SIGNIFICANCE
   - Which tests were significant/not significant
   - What this means practically

3. ASSUMPTIONS CHECK
   - Whether statistical assumptions were met
   - Impact on interpretation if violated

4. PRACTICAL IMPLICATIONS
   - What these results mean in real-world context
   - Business or research implications
   - Consider study context from information sheets if provided

5. LIMITATIONS AND CAVEATS
   - Any limitations of the analysis
   - Factors to consider when using results

6. RECOMMENDATIONS
   - Next steps or additional analyses suggested
   - How to use these findings

Keep the language accessible while maintaining statistical accuracy."""
        
        return prompt
    
    def query_llm(self, prompt: str, model: str = "gpt-4o", max_tokens: int = 1000) -> str:
        """Public method to query LLM - used by other components"""
        return self._call_llm(prompt, model, max_tokens)
    
    def _call_llm(self, prompt: str, model: str = "gpt-4o", max_tokens: int = 1000) -> str:
        """Call LLM with specified model"""
        try:
            if model.startswith('gpt-4o') or model.startswith('gpt-'):
                # Use OpenAI
                if self.openai_client:
                    response = self.openai_client.chat.completions.create(
                        model=model,  # Use the specified model parameter instead of hardcoded "gpt-4o"
                        messages=[{"role": "user", "content": prompt}],
                        max_tokens=max_tokens,
                        temperature=0.1
                    )
                    return response.choices[0].message.content
                else:
                    raise Exception("OpenAI client not available")
                    
            elif model.startswith('claude'):
                # Use Anthropic Claude (requires anthropic library)
                try:
                    import anthropic
                    anthropic_key = self.config.ANTHROPIC_API_KEY
                    if not anthropic_key or anthropic_key == 'sk-ant-api03-your-key-here':
                        raise Exception("Anthropic API key not configured")
                    
                    client = anthropic.Anthropic(api_key=anthropic_key)
                    response = client.messages.create(
                        model=model,  # Use the specified model parameter instead of hardcoded model
                        max_tokens=max_tokens,
                        temperature=0.1,
                        messages=[{"role": "user", "content": prompt}]
                    )
                    return response.content[0].text
                except ImportError:
                    raise Exception("Anthropic library not installed. Run: pip install anthropic")
                    
            elif model.startswith('gemini'):
                # Use Gemini
                if self.gemini_client:
                    response = self.gemini_client.generate_content(
                        prompt,
                        generation_config={'max_output_tokens': max_tokens, 'temperature': 0.1}
                    )
                    return response.text
                else:
                    raise Exception("Gemini client not available")
                    
            else:
                # Default to OpenAI if model not recognized
                if self.openai_client:
                    response = self.openai_client.chat.completions.create(
                        model=model,  # Use the specified model parameter
                        messages=[{"role": "user", "content": prompt}],
                        max_tokens=max_tokens,
                        temperature=0.1
                    )
                    return response.choices[0].message.content
                else:
                    raise Exception(f"Unsupported model: {model}")
                
        except Exception as e:
            logger.error(f"LLM call failed for model {model}: {str(e)}")
            raise
    
    def _parse_analysis_plan(self, response: str) -> Dict[str, Any]:
        """Parse JSON analysis plan from LLM response"""
        try:
            # Extract JSON from response
            json_match = re.search(r'\{.*\}', response, re.DOTALL)
            if json_match:
                json_str = json_match.group()
                return json.loads(json_str)
            else:
                # Fallback parsing
                return self._parse_analysis_plan_fallback(response)
        except:
            return self._parse_analysis_plan_fallback(response)
    
    def _parse_analysis_plan_fallback(self, response: str) -> Dict[str, Any]:
        """Fallback parsing for analysis plan"""
        return {
            'analysis_type': 'descriptive',
            'target_variables': [],
            'grouping_variables': [],
            'statistical_tests': ['descriptive_statistics'],
            'assumptions_to_check': [],
            'significance_level': 0.05,
            'description': response,
            'confidence': 0.5,
            'warnings': ['Could not parse structured response']
        }
    
    def _parse_script_response(self, response: str) -> Tuple[str, str]:
        """Extract script and explanation from LLM response"""
        # Look for code blocks
        code_blocks = re.findall(r'```python\n(.*?)\n```', response, re.DOTALL)
        
        if code_blocks:
            script = code_blocks[0]
            # Remove script from response to get explanation
            explanation = re.sub(r'```python\n.*?\n```', '[SCRIPT]', response, flags=re.DOTALL)
        else:
            # If no code block found, treat entire response as script
            script = response
            explanation = "Generated analysis script"
        
        # Clean up script: remove any embedded markdown artifacts
        # Sometimes LLM includes ```python at start of the actual code
        script = script.strip()
        if script.startswith('```python'):
            script = script[9:].strip()  # Remove ```python
        if script.endswith('```'):
            script = script[:-3].strip()  # Remove trailing ```
        
        return script, explanation.strip()
    
    def _parse_debug_response(self, response: str) -> Tuple[str, str]:
        """Parse debugging response"""
        # Look for explanation
        explanation_match = re.search(r'EXPLANATION:\s*(.*?)(?=FIXED SCRIPT:)', response, re.DOTALL)
        explanation = explanation_match.group(1).strip() if explanation_match else "Script fixed"
        
        # Look for fixed script
        script_match = re.search(r'FIXED SCRIPT:\s*```python\n(.*?)\n```', response, re.DOTALL)
        if script_match:
            fixed_script = script_match.group(1)
        else:
            # Fallback: look for any code block
            code_blocks = re.findall(r'```python\n(.*?)\n```', response, re.DOTALL)
            fixed_script = code_blocks[0] if code_blocks else response
        
        # Clean up script: remove any embedded markdown artifacts
        fixed_script = fixed_script.strip()
        if fixed_script.startswith('```python'):
            fixed_script = fixed_script[9:].strip()
        if fixed_script.endswith('```'):
            fixed_script = fixed_script[:-3].strip()
        
        return fixed_script, explanation
    
    def _build_analysis_config(self, analysis_plan: Dict[str, Any]) -> AnalysisConfiguration:
        """Build analysis configuration from plan"""
        analysis_type_map = {
            'descriptive': AnalysisType.DESCRIPTIVE,
            'hypothesis_test': AnalysisType.HYPOTHESIS_TEST,
            'regression': AnalysisType.REGRESSION,
            'correlation': AnalysisType.CORRELATION,
            'anova': AnalysisType.ANOVA,
            'control_chart': AnalysisType.CONTROL_CHART,
            'custom': AnalysisType.CUSTOM
        }
        
        return AnalysisConfiguration(
            analysis_type=analysis_type_map.get(analysis_plan.get('analysis_type', 'descriptive'), AnalysisType.DESCRIPTIVE),
            target_variables=analysis_plan.get('target_variables', []),
            grouping_variables=analysis_plan.get('grouping_variables', []),
            significance_level=analysis_plan.get('significance_level', 0.05),
            custom_parameters={
                'statistical_tests': analysis_plan.get('statistical_tests', []),
                'assumptions_to_check': analysis_plan.get('assumptions_to_check', []),
                'description': analysis_plan.get('description', ''),
                'warnings': analysis_plan.get('warnings', [])
            }
        )
    
    def _extract_required_libraries(self, script: str) -> List[str]:
        """Extract required libraries from script"""
        import_pattern = r'(?:import|from)\s+(\w+)'
        imports = re.findall(import_pattern, script)
        return list(set(imports))
    
    def _estimate_runtime(self, script: str) -> str:
        """Estimate script runtime based on content"""
        if 'for' in script or 'while' in script:
            return "Medium (1-30 seconds)"
        elif len(script.split('\n')) > 50:
            return "Long (30+ seconds)"
        else:
            return "Fast (<1 second)"
    
    def _extract_key_findings(self, interpretation: str) -> List[str]:
        """Extract key findings from interpretation"""
        # Simple extraction - could be enhanced with NLP
        lines = interpretation.split('\n')
        findings = []
        for line in lines:
            if any(keyword in line.lower() for keyword in ['significant', 'correlation', 'difference', 'p-value', 'statistic']):
                findings.append(line.strip())
        return findings[:5]  # Limit to top 5
    
    def _ensure_script_completeness(self, script: str) -> str:
        """
        Ensure script has all mandatory components regardless of LLM output.
        This is a ROBUST solution to prevent missing analysis_results.json file.
        
        Strategy:
        1. Check if script has results dictionary initialization
        2. Check if script saves to analysis_results.json
        3. If missing, intelligently add required components
        4. Always append safety net save code at the end
        """
        import re
        
        has_results_dict = bool(re.search(r'results\s*=\s*[{\[]', script))
        has_json_save = 'analysis_results.json' in script
        has_json_import = 'import json' in script
        
        logger.info(f"Script completeness check: results_dict={has_results_dict}, json_save={has_json_save}, json_import={has_json_import}")
        
        # Build the complete script with safety nets
        script_parts = []
        
        # 1. Ensure json import at the top if missing
        if not has_json_import:
            logger.warning("⚠️ Script missing 'import json' - adding it")
            # Find the last import statement to insert after it
            import_lines = []
            other_lines = []
            in_imports = True
            
            for line in script.split('\n'):
                if in_imports and (line.startswith('import ') or line.startswith('from ')):
                    import_lines.append(line)
                else:
                    if line.strip() and not line.startswith('#'):
                        in_imports = False
                    other_lines.append(line)
            
            # Add json import after other imports
            import_lines.append('import json')
            import_lines.append('import math')
            script = '\n'.join(import_lines) + '\n' + '\n'.join(other_lines)
        
        # 2. Ensure results dictionary is initialized
        if not has_results_dict:
            logger.warning("⚠️ Script missing 'results = {}' initialization - adding it")
            # Add after imports, before main code
            lines = script.split('\n')
            insert_pos = 0
            for i, line in enumerate(lines):
                if not (line.startswith('import ') or line.startswith('from ') or 
                       line.startswith('#') or line.strip() == ''):
                    insert_pos = i
                    break
            
            lines.insert(insert_pos, '\n# Initialize results dictionary\nresults = {}\n')
            script = '\n'.join(lines)
        
        # 3. ALWAYS append the safety net JSON save code at the end
        # This ensures that even if LLM added save code but it's buggy, we have a backup
        logger.info("✅ Adding SAFETY NET: Guaranteed JSON save code at end of script")
        
        safety_net = """

# ============================================================================
# SAFETY NET: Guaranteed JSON save (DO NOT REMOVE)
# ============================================================================
# This code ensures analysis_results.json is ALWAYS created, even if the
# script above forgot to save results or had an error in the save logic.

# Ensure results dictionary exists
if 'results' not in dir():
    print("WARNING: 'results' dictionary not found - creating empty one")
    results = {'warning': 'No results were generated by the analysis script'}

# Clean NaN and Inf values to ensure valid JSON
try:
    import math
    def clean_for_json(obj):
        if isinstance(obj, dict):
            return {k: clean_for_json(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [clean_for_json(item) for item in obj]
        elif isinstance(obj, float):
            if math.isnan(obj) or math.isinf(obj):
                return None
            return obj
        elif isinstance(obj, (pd.Series, pd.DataFrame)):
            return clean_for_json(obj.to_dict())
        return obj
    
    results = clean_for_json(results)
except Exception as e:
    print(f"WARNING: Error cleaning results: {e}")
    # Continue anyway with uncleaned results

# Save to JSON file
try:
    import json
    with open('analysis_results.json', 'w') as f:
        json.dump(results, f, indent=2, default=str)
    print("✅ SUCCESS: Results saved to analysis_results.json")
except Exception as e:
    print(f"❌ ERROR: Failed to save results to JSON: {e}")
    # Create a minimal file so the system doesn't fail
    try:
        with open('analysis_results.json', 'w') as f:
            json.dump({'error': str(e), 'partial_results': {}}, f, indent=2)
        print("⚠️ Created minimal analysis_results.json with error info")
    except:
        print("❌ CRITICAL: Cannot create analysis_results.json file at all")

print("="*70)
print("Script execution complete")
print("="*70)
"""
        
        script = script.rstrip() + safety_net
        
        logger.info(f"✅ Script completeness ensured: {len(script)} characters, guaranteed to save JSON")
        return script
    
    def _extract_recommendations(self, interpretation: str) -> List[str]:
        """Extract recommendations from interpretation"""
        # Look for recommendation sections
        rec_section = re.search(r'(?:RECOMMENDATIONS?|NEXT STEPS?|SUGGESTIONS?).*?(?=\n\n|\Z)', interpretation, re.DOTALL | re.IGNORECASE)
        if rec_section:
            recommendations = re.findall(r'-\s*(.*)', rec_section.group())
            return recommendations[:3]  # Limit to top 3
        return []
    
    def _get_fallback_suggestions(self, user_query: str, available_columns: List[str]) -> Dict[str, Any]:
        """Provide fallback suggestions when LLM fails"""
        return {
            'suggested_analysis': 'descriptive',
            'suggested_variables': available_columns[:5],
            'suggested_tests': ['summary_statistics', 'normality_test'],
            'note': 'Fallback suggestions - please review and adjust'
        }
    
    def _generate_basic_script(self, analysis_config: AnalysisConfiguration) -> str:
        """Generate basic fallback script"""
        return """
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Initialize results
results = {
    'summary_statistics': {},
    'test_results': {},
    'plots': [],
    'interpretation': 'Basic descriptive analysis completed'
}

# Basic descriptive statistics
results['summary_statistics'] = df.describe().to_dict()

print("Analysis completed successfully")
"""
    
    def _generate_basic_interpretation(self, results: Dict[str, Any]) -> str:
        """Generate basic interpretation when LLM fails"""
        return f"""
Basic Analysis Results:

The analysis has been completed successfully. Key statistics have been calculated and are available in the results.

Summary:
- Data processed successfully
- Statistical computations completed
- Results available for review

Please review the numerical results and plots for detailed insights.
"""
    
    def _build_requirements_prompt(self, analysis_config: AnalysisConfiguration,
                                 data_summary: Dict[str, Any], user_query: str) -> str:
        """Build prompt for requirements.txt generation"""
        
        return f"""
You are a Python environment expert generating a requirements.txt file for a statistical analysis project.

Analysis Configuration:
- Type: {analysis_config.analysis_type.value}
- Target Variables: {', '.join(analysis_config.target_variables)}
- Grouping Variables: {', '.join(analysis_config.grouping_variables or [])}
- User Query: {user_query}

Data Summary:
- Columns: {len(data_summary.get('columns', []))}
- Data Types: {data_summary.get('column_types', {})}
- Missing Values: {data_summary.get('missing_values', {})}

Generate a requirements.txt file that includes ALL necessary packages for this analysis.

Requirements:
1. Include essential data science packages (pandas, numpy, matplotlib, seaborn)
2. Include statistical packages (scipy, statsmodels) 
3. Include any specialized packages needed for the analysis type
4. Use specific versions for stability (e.g., pandas>=1.5.0)
5. Be comprehensive but not excessive

Format your response as:
REQUIREMENTS:
[list of requirements, one per line]

EXPLANATION:
[brief explanation of why each major package is needed]
"""
    
    def _parse_requirements_response(self, response: str) -> Tuple[str, str]:
        """Parse LLM response for requirements generation"""
        try:
            # Look for REQUIREMENTS: section
            req_match = re.search(r'REQUIREMENTS:\s*\n(.*?)\n\nEXPLANATION:', response, re.DOTALL)
            if req_match:
                requirements = req_match.group(1).strip()
            else:
                # Fallback: look for anything that looks like requirements
                lines = response.split('\n')
                req_lines = [line.strip() for line in lines if '==' in line or '>=' in line or line.strip().endswith('.txt') == False]
                requirements = '\n'.join(req_lines) if req_lines else self._get_default_requirements()
            
            # Look for explanation
            exp_match = re.search(r'EXPLANATION:\s*\n(.*?)(?:\n\n|\Z)', response, re.DOTALL)
            explanation = exp_match.group(1).strip() if exp_match else "Requirements generated for statistical analysis"
            
            return requirements, explanation
            
        except Exception as e:
            logger.error(f"Error parsing requirements response: {str(e)}")
            return self._get_default_requirements(), "Default requirements used due to parsing error"
    
    def _get_default_requirements(self) -> str:
        """Get default requirements for statistical analysis"""
        return """pandas>=1.5.0
numpy>=1.21.0
matplotlib>=3.5.0
seaborn>=0.11.0
scipy>=1.9.0
statsmodels>=0.13.0
scikit-learn>=1.1.0
plotly>=5.0.0
jupyter>=1.0.0"""

Parameters

Name	Type	Default	Kind
`bases`	-	-

Parameter Details

bases: Parameter of type

Return Value

Returns unspecified type

Class Interface

Methods

`init(self, config)`

Purpose: Internal method: init

Parameters:

config: Parameter

Returns: None

`setup_llm_clients(self)`

Purpose: Initialize LLM clients following your existing pattern

Returns: None

`interpret_user_query(self, user_query, data_summary, available_columns, model) -> Dict[str, Any]`

Purpose: Interpret natural language query and suggest analysis configuration

Parameters:

user_query: Type: str
data_summary: Type: Dict[str, Any]
available_columns: Type: List[str]
model: Type: str

Returns: Returns Dict[str, Any]

`generate_analysis_script(self, analysis_config, data_summary, user_query, model, previous_context) -> Dict[str, Any]`

Purpose: Generate Python script for statistical analysis with optional previous context

Parameters:

analysis_config: Type: AnalysisConfiguration
data_summary: Type: Dict[str, Any]
user_query: Type: str
model: Type: str
previous_context: Type: Dict[str, Any]

Returns: Returns Dict[str, Any]

`generate_requirements_txt(self, analysis_config, data_summary, user_query, model) -> Dict[str, Any]`

Purpose: Generate requirements.txt for the analysis script

Parameters:

analysis_config: Type: AnalysisConfiguration
data_summary: Type: Dict[str, Any]
user_query: Type: str
model: Type: str

Returns: Returns Dict[str, Any]

`debug_script_error(self, script, error_message, data_summary, iteration, model) -> Dict[str, Any]`

Purpose: Debug script errors using LLM (agent-mode debugging)

Parameters:

script: Type: str
error_message: Type: str
data_summary: Type: Dict[str, Any]
iteration: Type: int
model: Type: str

Returns: Returns Dict[str, Any]

`interpret_results(self, results, analysis_config, user_query, model, info_sheets, interpretation_template) -> Dict[str, Any]`

Purpose: Generate interpretation and insights from analysis results

Parameters:

results: Type: Dict[str, Any]
analysis_config: Type: AnalysisConfiguration
user_query: Type: str
model: Type: str
info_sheets: Type: Dict[str, str]
interpretation_template: Type: str

Returns: Returns Dict[str, Any]

`_build_interpretation_prompt(self, user_query, data_summary, available_columns) -> str`

Purpose: Build prompt for query interpretation

Parameters:

user_query: Type: str
data_summary: Type: Dict[str, Any]
available_columns: Type: List[str]

Returns: Returns str

`_build_script_generation_prompt(self, analysis_config, data_summary, user_query, previous_context) -> str`

Purpose: Build prompt for Python script generation with optional previous context

Parameters:

analysis_config: Type: AnalysisConfiguration
data_summary: Type: Dict[str, Any]
user_query: Type: str
previous_context: Type: Dict[str, Any]

Returns: Returns str

`_build_debugging_prompt(self, script, error_message, data_summary, iteration) -> str`

Purpose: Build prompt for script debugging

Parameters:

script: Type: str
error_message: Type: str
data_summary: Type: Dict[str, Any]
iteration: Type: int

Returns: Returns str

`_build_interpretation_results_prompt(self, results, analysis_config, user_query, info_sheets, interpretation_template) -> str`

Purpose: Build prompt for results interpretation

Parameters:

results: Type: Dict[str, Any]
analysis_config: Type: AnalysisConfiguration
user_query: Type: str
info_sheets: Type: Dict[str, str]
interpretation_template: Type: str

Returns: Returns str

`query_llm(self, prompt, model, max_tokens) -> str`

Purpose: Public method to query LLM - used by other components

Parameters:

prompt: Type: str
model: Type: str
max_tokens: Type: int

Returns: Returns str

`_call_llm(self, prompt, model, max_tokens) -> str`

Purpose: Call LLM with specified model

Parameters:

prompt: Type: str
model: Type: str
max_tokens: Type: int

Returns: Returns str

`_parse_analysis_plan(self, response) -> Dict[str, Any]`

Purpose: Parse JSON analysis plan from LLM response

Parameters:

response: Type: str

Returns: Returns Dict[str, Any]

`_parse_analysis_plan_fallback(self, response) -> Dict[str, Any]`

Purpose: Fallback parsing for analysis plan

Parameters:

response: Type: str

Returns: Returns Dict[str, Any]

`_parse_script_response(self, response) -> Tuple[str, str]`

Purpose: Extract script and explanation from LLM response

Parameters:

response: Type: str

Returns: Returns Tuple[str, str]

`_parse_debug_response(self, response) -> Tuple[str, str]`

Purpose: Parse debugging response

Parameters:

response: Type: str

Returns: Returns Tuple[str, str]

`_build_analysis_config(self, analysis_plan) -> AnalysisConfiguration`

Purpose: Build analysis configuration from plan

Parameters:

analysis_plan: Type: Dict[str, Any]

Returns: Returns AnalysisConfiguration

`_extract_required_libraries(self, script) -> List[str]`

Purpose: Extract required libraries from script

Parameters:

script: Type: str

Returns: Returns List[str]

`_estimate_runtime(self, script) -> str`

Purpose: Estimate script runtime based on content

Parameters:

script: Type: str

Returns: Returns str

`_extract_key_findings(self, interpretation) -> List[str]`

Purpose: Extract key findings from interpretation

Parameters:

interpretation: Type: str

Returns: Returns List[str]

`_ensure_script_completeness(self, script) -> str`

Purpose: Ensure script has all mandatory components regardless of LLM output. This is a ROBUST solution to prevent missing analysis_results.json file. Strategy: 1. Check if script has results dictionary initialization 2. Check if script saves to analysis_results.json 3. If missing, intelligently add required components 4. Always append safety net save code at the end

Parameters:

script: Type: str

Returns: Returns str

`_extract_recommendations(self, interpretation) -> List[str]`

Purpose: Extract recommendations from interpretation

Parameters:

interpretation: Type: str

Returns: Returns List[str]

`_get_fallback_suggestions(self, user_query, available_columns) -> Dict[str, Any]`

Purpose: Provide fallback suggestions when LLM fails

Parameters:

user_query: Type: str
available_columns: Type: List[str]

Returns: Returns Dict[str, Any]

`_generate_basic_script(self, analysis_config) -> str`

Purpose: Generate basic fallback script

Parameters:

analysis_config: Type: AnalysisConfiguration

Returns: Returns str

`_generate_basic_interpretation(self, results) -> str`

Purpose: Generate basic interpretation when LLM fails

Parameters:

results: Type: Dict[str, Any]

Returns: Returns str

`_build_requirements_prompt(self, analysis_config, data_summary, user_query) -> str`

Purpose: Build prompt for requirements.txt generation

Parameters:

analysis_config: Type: AnalysisConfiguration
data_summary: Type: Dict[str, Any]
user_query: Type: str

Returns: Returns str

`_parse_requirements_response(self, response) -> Tuple[str, str]`

Purpose: Parse LLM response for requirements generation

Parameters:

response: Type: str

Returns: Returns Tuple[str, str]

`_get_default_requirements(self) -> str`

Purpose: Get default requirements for statistical analysis

Returns: Returns str

Required Imports

import os
import json
import logging
import re
from typing import Dict

Usage Example

# Example usage:
# result = StatisticalAgent(bases)

Similar Components

AI-powered semantic similarity - components with related functionality:

class StatisticalAgent_v1 98.5% similar

LLM-powered statistical analysis agent
From: /tf/active/vicechatdev/full_smartstat/statistical_agent.py
class StatisticalAgent_v2 98.3% similar

LLM-powered statistical analysis agent
From: /tf/active/vicechatdev/smartstat/statistical_agent.py
function demo_statistical_agent 60.8% similar

Demonstrates the capabilities of a statistical agent by testing query interpretation on sample data with various statistical analysis queries.
From: /tf/active/vicechatdev/full_smartstat/demo.py
class LLMClient_v2 58.5% similar

Client for interacting with LLM providers (OpenAI, Anthropic, Azure, etc.)
From: /tf/active/vicechatdev/contract_validity_analyzer/utils/llm_client.py
class StatisticalAnalysisService 55.8% similar

Main service for statistical analysis orchestration
From: /tf/active/vicechatdev/full_smartstat/services.py

← Back to Browse

Assistant

Hi! I can help improve this code. Tell me what you'd like to enhance (e.g., "add error handling", "optimize performance", "improve readability", "add type hints").

Code Comparison

Original Code

                            class StatisticalAgent:
    """LLM-powered statistical analysis agent"""
    
    def __init__(self, config):
        self.config = config
        self.setup_llm_clients()
        
    def setup_llm_clients(self):
        """Initialize LLM clients following your existing pattern"""
        # OpenAI client
        if OPENAI_AVAILABLE and self.config.OPENAI_API_KEY:
            self.openai_client = openai.OpenAI(api_key=self.config.OPENAI_API_KEY)
        else:
            self.openai_client = None
            
        # Azure OpenAI client
        if OPENAI_AVAILABLE and self.config.AZURE_OPENAI_API_KEY:
            self.azure_client = openai.AzureOpenAI(
                azure_endpoint=self.config.AZURE_OPENAI_ENDPOINT,
                api_key=self.config.AZURE_OPENAI_API_KEY,
                api_version="2024-02-15-preview"
            )
        else:
            self.azure_client = None
            
        # Gemini client
        if GEMINI_AVAILABLE and self.config.GEMINI_API_KEY:
            genai.configure(api_key=self.config.GEMINI_API_KEY)
            self.gemini_client = genai.GenerativeModel('gemini-pro')
        else:
            self.gemini_client = None
    
    def interpret_user_query(self, user_query: str, data_summary: Dict[str, Any], 
                           available_columns: List[str], model: str = 'gpt-4o') -> Dict[str, Any]:
        """
        Interpret natural language query and suggest analysis configuration
        """
        prompt = self._build_interpretation_prompt(user_query, data_summary, available_columns)
        
        try:
            response = self._call_llm(prompt, model=model, max_tokens=1000)
            
            # Parse the structured response
            analysis_plan = self._parse_analysis_plan(response)
            
            return {
                'success': True,
                'analysis_plan': analysis_plan,
                'suggested_config': self._build_analysis_config(analysis_plan),
                'interpretation': response,
                'confidence': analysis_plan.get('confidence', 0.8)
            }
            
        except Exception as e:
            logger.error(f"Error interpreting user query: {str(e)}")
            return {
                'success': False,
                'error': str(e),
                'fallback_suggestions': self._get_fallback_suggestions(user_query, available_columns)
            }
    
    def generate_analysis_script(self, analysis_config: AnalysisConfiguration, 
                               data_summary: Dict[str, Any],
                               user_query: str,
                               model: str = 'gpt-4o',
                               previous_context: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Generate Python script for statistical analysis with optional previous context
        """
        prompt = self._build_script_generation_prompt(analysis_config, data_summary, user_query, previous_context)
        
        # DEBUG: Log the prompt to see what's being sent to LLM
        logger.info("=" * 80)
        logger.info("PROMPT SENT TO LLM:")
        logger.info("=" * 80)
        logger.info(prompt[:2000])  # Log first 2000 chars
        logger.info("...")
        logger.info("=" * 80)
        
        try:
            # Increased from 2000 to 8000 to avoid truncation of complex scripts
            response = self._call_llm(prompt, model=model, max_tokens=8000)
            
            # Extract script and explanation
            script, explanation = self._parse_script_response(response)
            
            # ROBUST SOLUTION: Always wrap script with mandatory components
            # This ensures JSON saving happens even if LLM forgets
            script = self._ensure_script_completeness(script)
            
            return {
                'success': True,
                'script': script,
                'explanation': explanation,
                'estimated_runtime': self._estimate_runtime(script),
                'required_libraries': self._extract_required_libraries(script)
            }
            
        except Exception as e:
            logger.error(f"Error generating analysis script: {str(e)}")
            return {
                'success': False,
                'error': str(e),
                'fallback_script': self._generate_basic_script(analysis_config)
            }
    
    def generate_requirements_txt(self, analysis_config: AnalysisConfiguration, 
                                data_summary: Dict[str, Any],
                                user_query: str, model: str = 'gpt-4o') -> Dict[str, Any]:
        """
        Generate requirements.txt for the analysis script
        """
        prompt = self._build_requirements_prompt(analysis_config, data_summary, user_query)
        
        try:
            response = self._call_llm(prompt, model=model, max_tokens=800)
            
            # Extract requirements and explanation
            requirements, explanation = self._parse_requirements_response(response)
            
            return {
                'success': True,
                'requirements': requirements,
                'explanation': explanation
            }
            
        except Exception as e:
            logger.error(f"Error generating requirements.txt: {str(e)}")
            return {
                'success': False,
                'error': str(e),
                'fallback_requirements': self._get_default_requirements()
            }
    
    def debug_script_error(self, script: str, error_message: str, 
                         data_summary: Dict[str, Any], iteration: int = 1, model: str = 'gpt-4o') -> Dict[str, Any]:
        """
        Debug script errors using LLM (agent-mode debugging)
        """
        prompt = self._build_debugging_prompt(script, error_message, data_summary, iteration)
        
        try:
            response = self._call_llm(prompt, model=model, max_tokens=1500)
            
            # Parse debugging response
            fixed_script, explanation = self._parse_debug_response(response)
            
            return {
                'success': True,
                'fixed_script': fixed_script,
                'explanation': explanation,
                'debug_iteration': iteration,
                'confidence': 0.9 - (iteration * 0.1)  # Decreasing confidence with iterations
            }
            
        except Exception as e:
            logger.error(f"Error debugging script: {str(e)}")
            return {
                'success': False,
                'error': str(e),
                'iteration': iteration
            }
    
    def interpret_results(self, results: Dict[str, Any], analysis_config: AnalysisConfiguration,
                         user_query: str, model: str = 'gpt-4o', info_sheets: Dict[str, str] = None,
                         interpretation_template: str = None) -> Dict[str, Any]:
        """
        Generate interpretation and insights from analysis results
        """
        prompt = self._build_interpretation_results_prompt(results, analysis_config, user_query, info_sheets, interpretation_template)
        
        try:
            # Increased from 1500 to 8000 tokens to allow comprehensive interpretations
            # Statistical analysis interpretations often need more space for detailed findings,
            # assumptions checks, normality tests, post-hoc comparisons, etc.
            response = self._call_llm(prompt, model=model, max_tokens=8000)
            
            return {
                'success': True,
                'interpretation': response,
                'key_findings': self._extract_key_findings(response),
                'recommendations': self._extract_recommendations(response)
            }
            
        except Exception as e:
            logger.error(f"Error interpreting results: {str(e)}")
            return {
                'success': False,
                'error': str(e),
                'basic_interpretation': self._generate_basic_interpretation(results)
            }
    
    def _build_interpretation_prompt(self, user_query: str, data_summary: Dict[str, Any], 
                                   available_columns: List[str]) -> str:
        """Build prompt for query interpretation"""
        return f"""You are a statistical analysis expert. Analyze the user's request and suggest the most appropriate statistical analysis.

USER QUERY: "{user_query}"

DATASET INFORMATION:
- Shape: {data_summary.get('shape', 'Unknown')}
- Available columns: {', '.join(available_columns)}
- Column details: {json.dumps(data_summary.get('column_info', {}), indent=2)}

TASK: Interpret the user's request and provide a structured analysis plan in JSON format:

{{
    "analysis_type": "descriptive|hypothesis_test|regression|correlation|anova|control_chart|custom",
    "target_variables": ["list", "of", "target", "variables"],
    "grouping_variables": ["list", "of", "grouping", "variables"],
    "statistical_tests": ["list", "of", "specific", "tests"],
    "assumptions_to_check": ["normality", "homogeneity", "independence", "etc"],
    "significance_level": 0.05,
    "description": "Detailed description of the analysis plan",
    "confidence": 0.9,
    "warnings": ["any", "potential", "issues"]
}}

Focus on:
1. Descriptive statistics and hypothesis testing
2. Control chart analysis for quality data
3. Checking statistical assumptions
4. Appropriate test selection based on data types and distribution

Provide only the JSON response."""
    
    def _build_script_generation_prompt(self, analysis_config: AnalysisConfiguration,
                                      data_summary: Dict[str, Any], user_query: str,
                                      previous_context: Dict[str, Any] = None) -> str:
        """Build prompt for Python script generation with optional previous context"""
        
        # Build the base prompt
        prompt_parts = []
        
        prompt_parts.append("""You are a Python statistical programming expert. Generate a complete Python script for statistical analysis.

🔴 CRITICAL REQUIREMENT 🔴
Your script MUST end by saving results to 'analysis_results.json' file. 
Without this file, the analysis will FAIL and interpretation cannot be generated.
Every script you generate MUST include the JSON save code at the end.
""")
        
        # Add previous context if available
        if previous_context and previous_context.get('count', 0) > 0:
            context_type = previous_context.get('type', 'previous')
            prompt_parts.append(f"\nPREVIOUS ANALYSIS CONTEXT:")
            if context_type == 'selected':
                prompt_parts.append(f"This is a follow-up analysis. The user has specifically selected {previous_context['count']} previous analyses to reference:")
            else:
                prompt_parts.append(f"This is a follow-up analysis in an iterative session. Here's what was done previously:")
            prompt_parts.append(previous_context['summary'])
            
            # Include detailed context from the most recent previous analysis
            if previous_context['analyses']:
                recent_analysis = previous_context['analyses'][-1]  # Most recent
                prompt_parts.append(f"\nMOST RECENT ANALYSIS DETAILS:")
                prompt_parts.append(f"Query: {recent_analysis.get('user_query', 'Unknown')}")
                
                if recent_analysis.get('conclusions'):
                    prompt_parts.append(f"Previous conclusions: {recent_analysis['conclusions'][:500]}...")
                
                if recent_analysis.get('results_summary'):
                    prompt_parts.append(f"Previous results files generated:")
                    for result in recent_analysis['results_summary'][:3]:  # Limit to first 3 files
                        prompt_parts.append(f"- {result['filename']}: {result['preview'][:200]}...")
                
                if recent_analysis.get('script_content'):
                    # Include key parts of previous script for reference
                    script_lines = recent_analysis['script_content'].split('\n')
                    imports_and_key_code = []
                    for line in script_lines[:50]:  # First 50 lines typically contain imports and key setup
                        if line.strip().startswith(('import ', 'from ', 'def ', '# Key')) or 'matplotlib' in line or 'seaborn' in line:
                            imports_and_key_code.append(line)
                    
                    if imports_and_key_code:
                        prompt_parts.append(f"\nPrevious script key components (for reference):")
                        prompt_parts.append('\n'.join(imports_and_key_code[:20]))  # Limit lines
            
            prompt_parts.append(f"\nIMPORTANT: Build upon previous work. Reference previous findings. Avoid repeating exactly the same analysis unless specifically requested.")
        
        prompt_parts.append(f"\nCURRENT REQUEST: \"{user_query}\"")
        
        # Build the base prompt first (but don't return yet!)
        prompt = f"""{chr(10).join(prompt_parts)}

ANALYSIS CONFIGURATION:
- Type: {analysis_config.analysis_type.value}
- Target variables: {analysis_config.target_variables}
- Grouping variables: {analysis_config.grouping_variables}
- Significance level: {analysis_config.significance_level}

DATASET INFO:
"""
        
        # Handle multi-dataset or single dataset
        summary_type = data_summary.get('type', 'NONE')
        datasets_count = len(data_summary.get('datasets', {}))
        
        print(f"\n{'='*80}")
        print(f"STATISTICAL_AGENT DEBUG:")
        print(f"  data_summary['type'] = '{summary_type}'")
        print(f"  len(data_summary.get('datasets')) = {datasets_count}")
        print(f"  Condition check: data_summary.get('type') == 'multi-dataset' ? {summary_type == 'multi-dataset'}")
        print(f"{'='*80}\n")
        
        logger.info(f"DEBUG: data_summary type = {summary_type}, has datasets = {datasets_count}")
        
        if data_summary.get('type') == 'multi-dataset':
            prompt += f"""
This session has MULTIPLE DATASETS ({data_summary.get('dataset_count')} total):

"""
            # Build list of dataset loading instructions
            dataset_load_commands = []
            logger.info(f"Processing {len(data_summary.get('datasets', {}))} datasets for prompt")
            for dataset_name, dataset_info in data_summary.get('datasets', {}).items():
                # Get the actual CSV filename from data_summary (calculated in smartstat_service.py)
                csv_filename = dataset_info.get('csv_filename', 'data.csv')
                logger.info(f"Dataset '{dataset_name}' → CSV file: '{csv_filename}'")
                
                # Create variable name from filename (without .csv)
                var_name = csv_filename.replace('.csv', '')
                
                prompt += f"""
Dataset: '{dataset_name}'
- Load from file: {csv_filename}
- Rows: {dataset_info.get('rows')}
- Columns: {dataset_info.get('columns')}
- Column names (USE EXACT NAMES): {dataset_info.get('column_names', [])}
- Column types: {json.dumps(dataset_info.get('dtypes', {}), indent=2)}
- Numeric columns: {dataset_info.get('numeric_columns', [])}

"""
                dataset_load_commands.append(f"{var_name} = pd.read_csv('{csv_filename}')")
            
            # Add information sheets context if available
            info_sheets = data_summary.get('info_sheets', {})
            logger.info(f"DEBUG: info_sheets keys = {list(info_sheets.keys()) if info_sheets else 'None'}")
            if info_sheets:
                logger.info(f"Adding {len(info_sheets)} information sheets to prompt")
                prompt += f"""
ADDITIONAL CONTEXT FROM INFORMATION SHEETS:
The following information sheets provide study context and metadata:

"""
                for sheet_name, context in info_sheets.items():
                    logger.info(f"  Info sheet '{sheet_name}': {len(context)} characters")
                    # Include full context (up to 2000 chars) for better understanding
                    truncated_context = context[:2000] if len(context) > 2000 else context
                    truncation_note = "... (truncated for token limits)" if len(context) > 2000 else ""
                    prompt += f"""
--- {sheet_name} ---
{truncated_context}{truncation_note}

"""
            else:
                logger.info("No information sheets to add to prompt")
            
            prompt += f"""
MULTI-DATASET LOADING INSTRUCTIONS:
1. Load each dataset using these EXACT commands:
{chr(10).join('   ' + cmd for cmd in dataset_load_commands)}

2. You can analyze each dataset separately or combine them if appropriate
3. If combining datasets, consider using pd.concat() or pd.merge() depending on the relationship
4. CRITICAL: Use the EXACT column names from each dataset - DO NOT rename or convert them
5. ⚠️ ABSOLUTELY CRITICAL: The CSV files are already present in the working directory
6. ⚠️ IF FILE LOADING FAILS: Print a clear error message and EXIT immediately - DO NOT create dummy/sample data
7. ⚠️ NEVER create sample datasets, fabricated data, or placeholder data - this is real research data
8. Use the contextual information from information sheets to better understand the study design and interpret results

"""
        else:
            # Single dataset
            prompt += f"""
- Rows: {data_summary.get('rows', 'Unknown')}
- Columns: {data_summary.get('columns', 'Unknown')}
- Column names (USE THESE EXACT NAMES): {data_summary.get('column_names', [])}
- Column types: {json.dumps(data_summary.get('dtypes', {}), indent=2)}
- Numeric columns: {data_summary.get('numeric_columns', [])}

"""
        
        prompt += """
REQUIREMENTS:
1. MUST include all import statements (pandas, numpy, matplotlib, etc.) - this is a standalone script
2. MUST load data using pd.read_csv() from the appropriate CSV file(s)
3. CRITICAL COLUMN NAMES: Use the EXACT column names from the dataset info above - DO NOT rename or convert them (e.g., if the column is 'Egg Weight (g)', use exactly that, not 'egg_weight')
4. When accessing columns with spaces or special characters, use bracket notation: df['Egg Weight (g)'] not df.egg_weight
5. **CRITICAL - MISSING VALUES**: Properly handle missing/NaN values in all analyses:
   - Use .dropna() when calculating statistics on columns with missing values
   - For group comparisons, remove NaN before analysis: df.groupby('group')['value'].apply(lambda x: x.dropna())
   - For plotting, filter out NaN values to avoid display issues: df[df['column'].notna()]
   - Document how many values were excluded due to missing data
   - NEVER include NaN in group counts or statistical calculations
6. Include proper error handling and data validation (check for NaN values, ensure numeric columns are properly typed)
7. Generate clear plots with proper titles and labels (include sample sizes: n=X)
8. Store ONLY statistical results and data in the results dictionary - NO INTERPRETATION TEXT
9. Include statistical assumptions checking
10. Add comments explaining each step
11. Save plots to files using plt.savefig() and store filenames in results['plots']
12. At the end, save results dictionary to 'analysis_results.json' file
13. **CRITICAL**: Do NOT generate interpretation or conclusion text in the script - this will be done separately by a different AI model

EXECUTION ENVIRONMENT:
- Script runs standalone via subprocess in a virtual environment
- Data is provided in CSV file(s) (same directory as script)
- Script must be completely self-contained with all imports
- Results must be saved to 'analysis_results.json' file

SCRIPT STRUCTURE EXAMPLE:
```python
# Import all required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
import json

warnings.filterwarnings('ignore')

# Initialize results dictionary
results = {{}}

# Load data from CSV file
df = pd.read_csv('data.csv')

# CRITICAL: Handle missing values properly
# Example - Group comparison with missing values
numeric_col = 'Egg weight'
group_col = 'Farm'

# Remove NaN values before analysis
df_clean = df[[group_col, numeric_col]].dropna()

# Calculate statistics per group
group_stats = df_clean.groupby(group_col)[numeric_col].agg([
    ('mean', 'mean'),
    ('std', 'std'),
    ('n', 'count'),  # Use count to get actual sample sizes
    ('min', 'min'),
    ('max', 'max')
]).round(2)

# For plotting, use clean data
plt.figure(figsize=(10, 6))
for group in df_clean[group_col].unique():
    group_data = df_clean[df_clean[group_col] == group][numeric_col]
    n = len(group_data)
    plt.hist(group_data, alpha=0.5, label=f'{{group}} (n={{n}})')
plt.xlabel(numeric_col)
plt.ylabel('Frequency')
plt.title(f'Distribution of {{numeric_col}} by {{group_col}}')
plt.legend()
plt.savefig('plot1.png', dpi=300, bbox_inches='tight')
results['plots'] = ['plot1.png']
plt.close()

# Store ONLY numeric/statistical results - NO text interpretation
results['summary_statistics'] = group_stats.to_dict()
results['missing_values_excluded'] = len(df) - len(df_clean)
results['test_results'] = {{
    'statistic': 2.45,
    'p_value': 0.014,
    'test_name': 't-test'
}}
```
# DO NOT add results['interpretation'] or results['conclusions'] - this is handled separately

# Save plots and track filenames
results['plots'] = []
plt.figure()
# ... create plot ...
plt.savefig('plot1.png')
results['plots'].append('plot1.png')
plt.close()

# CRITICAL: Clean NaN/Infinity values before saving JSON
# Replace NaN and Inf with None to ensure valid JSON
import math
def clean_for_json(obj):
    if isinstance(obj, dict):
        return {{k: clean_for_json(v) for k, v in obj.items()}}
    elif isinstance(obj, list):
        return [clean_for_json(item) for item in obj]
    elif isinstance(obj, float):
        if math.isnan(obj) or math.isinf(obj):
            return None
        return obj
    return obj

results = clean_for_json(results)

# Save results to JSON file
with open('analysis_results.json', 'w') as f:
    json.dump(results, f, indent=2, default=str)

print("Analysis complete - results saved to analysis_results.json")
```

**MANDATORY FINAL STEPS** - Your script MUST end with these exact lines:
```python
# MANDATORY: Clean and save results to JSON
import math

def clean_for_json(obj):
    if isinstance(obj, dict):
        return {{k: clean_for_json(v) for k, v in obj.items()}}
    elif isinstance(obj, list):
        return [clean_for_json(item) for item in obj]
    elif isinstance(obj, float):
        if math.isnan(obj) or math.isinf(obj):
            return None
        return obj
    return obj

results = clean_for_json(results)

with open('analysis_results.json', 'w') as f:
    json.dump(results, f, indent=2, default=str)

print("Analysis complete - results saved to analysis_results.json")
```

**CRITICAL REQUIREMENTS**:
- ✅ MUST save results to 'analysis_results.json' - THIS IS MANDATORY
- ✅ MUST include the clean_for_json function to handle NaN/Inf values
- ✅ MUST use json.dump() with indent=2 and default=str
- ✅ Focus on DATA and STATISTICS only - NO textual interpretation
- ✅ All interpretation will be generated separately from the JSON results
- ❌ DO NOT add results['interpretation'] or results['conclusions'] fields

**SCRIPT VALIDATION CHECKLIST** - Every script must have:
1. ✓ Import json at the top
2. ✓ Initialize results = {{}} dictionary early
3. ✓ Add statistical results to results dictionary
4. ✓ Add plot filenames to results['plots']
5. ✓ Include clean_for_json() function
6. ✓ Call clean_for_json(results) before saving
7. ✓ Save with open('analysis_results.json', 'w') and json.dump()
8. ✓ Print confirmation message

Provide the complete Python script with detailed comments. ENSURE it ends with the JSON save code shown above.
"""
        
        # CRITICAL: Return the completed prompt!
        return prompt
    
    def _build_debugging_prompt(self, script: str, error_message: str,
                              data_summary: Dict[str, Any], iteration: int) -> str:
        """Build prompt for script debugging"""
        return f"""You are a Python debugging expert. Fix the error in this statistical analysis script.

ORIGINAL SCRIPT:
```python
{script}
```

ERROR MESSAGE:
{error_message}

DATASET INFO:
- Rows: {data_summary.get('rows', 'Unknown')}
- Columns: {data_summary.get('columns', 'Unknown')}
- Available column names (USE EXACT NAMES): {data_summary.get('column_names', [])}
- Column types: {json.dumps(data_summary.get('dtypes', {}), indent=2)}

DEBUG ITERATION: {iteration}

REQUIREMENTS:
1. Identify the root cause of the error
2. Provide a fixed version of the script
3. Explain what was wrong and how you fixed it
4. Ensure the fix maintains the original analysis intent
5. Add additional error handling if needed
6. Remember: Script must be standalone with all imports
7. Data is loaded from 'data.csv' file
8. Results must be saved to 'analysis_results.json'
9. CRITICAL: Use EXACT column names from the dataset (with spaces, parentheses, capitals as shown above)
10. Use bracket notation for column access: df['Column Name'] not df.column_name

RESPONSE FORMAT:
EXPLANATION: [Explain the error and your fix]

FIXED SCRIPT:
```python
[Your fixed Python code here - complete standalone script with imports]
```

Focus on common issues:
- Column name mismatches (MUST use exact names: 'Egg Weight (g)' not 'egg_weight' or 'Egg_Weight_g')
- Using dot notation instead of bracket notation for columns with spaces/special chars
- Data type problems
- Missing data handling
- Library import errors
- Statistical test requirements
- Data loading from 'data.csv'
- Results saving to 'analysis_results.json'"""
    
    def _build_interpretation_results_prompt(self, results: Dict[str, Any],
                                           analysis_config: AnalysisConfiguration,
                                           user_query: str,
                                           info_sheets: Dict[str, str] = None,
                                           interpretation_template: str = None) -> str:
        """Build prompt for results interpretation"""
        
        # Build base prompt
        prompt = f"""You are a statistical analyst. Interpret these analysis results for a non-technical audience.

USER QUERY: "{user_query}"
ANALYSIS TYPE: {analysis_config.analysis_type.value}
SIGNIFICANCE LEVEL: {analysis_config.significance_level}

RESULTS:
{json.dumps(results, indent=2, default=str)}
"""
        
        # Add information sheets context if available
        if info_sheets:
            logger.info(f"Adding {len(info_sheets)} information sheets to interpretation prompt")
            prompt += f"""
ADDITIONAL CONTEXT FROM STUDY INFORMATION:
The following information sheets provide important study context for interpreting results:

"""
            for sheet_name, context in info_sheets.items():
                logger.info(f"  Info sheet '{sheet_name}': {len(context)} characters")
                # Include full context (up to 2000 chars) for better understanding
                truncated_context = context[:2000] if len(context) > 2000 else context
                truncation_note = "... (truncated for token limits)" if len(context) > 2000 else ""
                prompt += f"""
--- {sheet_name} ---
{truncated_context}{truncation_note}

"""
        
        # Add interpretation instructions (template or default)
        if interpretation_template:
            logger.info("Using custom interpretation template")
            prompt += f"""
INTERPRETATION INSTRUCTIONS:
{interpretation_template}
"""
        else:
            # Default interpretation instructions
            prompt += """
TASK: Provide a clear, comprehensive interpretation including:

1. SUMMARY OF FINDINGS
   - What the analysis shows in plain language
   - Key statistics and their meaning

2. STATISTICAL SIGNIFICANCE
   - Which tests were significant/not significant
   - What this means practically

3. ASSUMPTIONS CHECK
   - Whether statistical assumptions were met
   - Impact on interpretation if violated

4. PRACTICAL IMPLICATIONS
   - What these results mean in real-world context
   - Business or research implications
   - Consider study context from information sheets if provided

5. LIMITATIONS AND CAVEATS
   - Any limitations of the analysis
   - Factors to consider when using results

6. RECOMMENDATIONS
   - Next steps or additional analyses suggested
   - How to use these findings

Keep the language accessible while maintaining statistical accuracy."""
        
        return prompt
    
    def query_llm(self, prompt: str, model: str = "gpt-4o", max_tokens: int = 1000) -> str:
        """Public method to query LLM - used by other components"""
        return self._call_llm(prompt, model, max_tokens)
    
    def _call_llm(self, prompt: str, model: str = "gpt-4o", max_tokens: int = 1000) -> str:
        """Call LLM with specified model"""
        try:
            if model.startswith('gpt-4o') or model.startswith('gpt-'):
                # Use OpenAI
                if self.openai_client:
                    response = self.openai_client.chat.completions.create(
                        model=model,  # Use the specified model parameter instead of hardcoded "gpt-4o"
                        messages=[{"role": "user", "content": prompt}],
                        max_tokens=max_tokens,
                        temperature=0.1
                    )
                    return response.choices[0].message.content
                else:
                    raise Exception("OpenAI client not available")
                    
            elif model.startswith('claude'):
                # Use Anthropic Claude (requires anthropic library)
                try:
                    import anthropic
                    anthropic_key = self.config.ANTHROPIC_API_KEY
                    if not anthropic_key or anthropic_key == 'sk-ant-api03-your-key-here':
                        raise Exception("Anthropic API key not configured")
                    
                    client = anthropic.Anthropic(api_key=anthropic_key)
                    response = client.messages.create(
                        model=model,  # Use the specified model parameter instead of hardcoded model
                        max_tokens=max_tokens,
                        temperature=0.1,
                        messages=[{"role": "user", "content": prompt}]
                    )
                    return response.content[0].text
                except ImportError:
                    raise Exception("Anthropic library not installed. Run: pip install anthropic")
                    
            elif model.startswith('gemini'):
                # Use Gemini
                if self.gemini_client:
                    response = self.gemini_client.generate_content(
                        prompt,
                        generation_config={'max_output_tokens': max_tokens, 'temperature': 0.1}
                    )
                    return response.text
                else:
                    raise Exception("Gemini client not available")
                    
            else:
                # Default to OpenAI if model not recognized
                if self.openai_client:
                    response = self.openai_client.chat.completions.create(
                        model=model,  # Use the specified model parameter
                        messages=[{"role": "user", "content": prompt}],
                        max_tokens=max_tokens,
                        temperature=0.1
                    )
                    return response.choices[0].message.content
                else:
                    raise Exception(f"Unsupported model: {model}")
                
        except Exception as e:
            logger.error(f"LLM call failed for model {model}: {str(e)}")
            raise
    
    def _parse_analysis_plan(self, response: str) -> Dict[str, Any]:
        """Parse JSON analysis plan from LLM response"""
        try:
            # Extract JSON from response
            json_match = re.search(r'\{.*\}', response, re.DOTALL)
            if json_match:
                json_str = json_match.group()
                return json.loads(json_str)
            else:
                # Fallback parsing
                return self._parse_analysis_plan_fallback(response)
        except:
            return self._parse_analysis_plan_fallback(response)
    
    def _parse_analysis_plan_fallback(self, response: str) -> Dict[str, Any]:
        """Fallback parsing for analysis plan"""
        return {
            'analysis_type': 'descriptive',
            'target_variables': [],
            'grouping_variables': [],
            'statistical_tests': ['descriptive_statistics'],
            'assumptions_to_check': [],
            'significance_level': 0.05,
            'description': response,
            'confidence': 0.5,
            'warnings': ['Could not parse structured response']
        }
    
    def _parse_script_response(self, response: str) -> Tuple[str, str]:
        """Extract script and explanation from LLM response"""
        # Look for code blocks
        code_blocks = re.findall(r'```python\n(.*?)\n```', response, re.DOTALL)
        
        if code_blocks:
            script = code_blocks[0]
            # Remove script from response to get explanation
            explanation = re.sub(r'```python\n.*?\n```', '[SCRIPT]', response, flags=re.DOTALL)
        else:
            # If no code block found, treat entire response as script
            script = response
            explanation = "Generated analysis script"
        
        # Clean up script: remove any embedded markdown artifacts
        # Sometimes LLM includes ```python at start of the actual code
        script = script.strip()
        if script.startswith('```python'):
            script = script[9:].strip()  # Remove ```python
        if script.endswith('```'):
            script = script[:-3].strip()  # Remove trailing ```
        
        return script, explanation.strip()
    
    def _parse_debug_response(self, response: str) -> Tuple[str, str]:
        """Parse debugging response"""
        # Look for explanation
        explanation_match = re.search(r'EXPLANATION:\s*(.*?)(?=FIXED SCRIPT:)', response, re.DOTALL)
        explanation = explanation_match.group(1).strip() if explanation_match else "Script fixed"
        
        # Look for fixed script
        script_match = re.search(r'FIXED SCRIPT:\s*```python\n(.*?)\n```', response, re.DOTALL)
        if script_match:
            fixed_script = script_match.group(1)
        else:
            # Fallback: look for any code block
            code_blocks = re.findall(r'```python\n(.*?)\n```', response, re.DOTALL)
            fixed_script = code_blocks[0] if code_blocks else response
        
        # Clean up script: remove any embedded markdown artifacts
        fixed_script = fixed_script.strip()
        if fixed_script.startswith('```python'):
            fixed_script = fixed_script[9:].strip()
        if fixed_script.endswith('```'):
            fixed_script = fixed_script[:-3].strip()
        
        return fixed_script, explanation
    
    def _build_analysis_config(self, analysis_plan: Dict[str, Any]) -> AnalysisConfiguration:
        """Build analysis configuration from plan"""
        analysis_type_map = {
            'descriptive': AnalysisType.DESCRIPTIVE,
            'hypothesis_test': AnalysisType.HYPOTHESIS_TEST,
            'regression': AnalysisType.REGRESSION,
            'correlation': AnalysisType.CORRELATION,
            'anova': AnalysisType.ANOVA,
            'control_chart': AnalysisType.CONTROL_CHART,
            'custom': AnalysisType.CUSTOM
        }
        
        return AnalysisConfiguration(
            analysis_type=analysis_type_map.get(analysis_plan.get('analysis_type', 'descriptive'), AnalysisType.DESCRIPTIVE),
            target_variables=analysis_plan.get('target_variables', []),
            grouping_variables=analysis_plan.get('grouping_variables', []),
            significance_level=analysis_plan.get('significance_level', 0.05),
            custom_parameters={
                'statistical_tests': analysis_plan.get('statistical_tests', []),
                'assumptions_to_check': analysis_plan.get('assumptions_to_check', []),
                'description': analysis_plan.get('description', ''),
                'warnings': analysis_plan.get('warnings', [])
            }
        )
    
    def _extract_required_libraries(self, script: str) -> List[str]:
        """Extract required libraries from script"""
        import_pattern = r'(?:import|from)\s+(\w+)'
        imports = re.findall(import_pattern, script)
        return list(set(imports))
    
    def _estimate_runtime(self, script: str) -> str:
        """Estimate script runtime based on content"""
        if 'for' in script or 'while' in script:
            return "Medium (1-30 seconds)"
        elif len(script.split('\n')) > 50:
            return "Long (30+ seconds)"
        else:
            return "Fast (<1 second)"
    
    def _extract_key_findings(self, interpretation: str) -> List[str]:
        """Extract key findings from interpretation"""
        # Simple extraction - could be enhanced with NLP
        lines = interpretation.split('\n')
        findings = []
        for line in lines:
            if any(keyword in line.lower() for keyword in ['significant', 'correlation', 'difference', 'p-value', 'statistic']):
                findings.append(line.strip())
        return findings[:5]  # Limit to top 5
    
    def _ensure_script_completeness(self, script: str) -> str:
        """
        Ensure script has all mandatory components regardless of LLM output.
        This is a ROBUST solution to prevent missing analysis_results.json file.
        
        Strategy:
        1. Check if script has results dictionary initialization
        2. Check if script saves to analysis_results.json
        3. If missing, intelligently add required components
        4. Always append safety net save code at the end
        """
        import re
        
        has_results_dict = bool(re.search(r'results\s*=\s*[{\[]', script))
        has_json_save = 'analysis_results.json' in script
        has_json_import = 'import json' in script
        
        logger.info(f"Script completeness check: results_dict={has_results_dict}, json_save={has_json_save}, json_import={has_json_import}")
        
        # Build the complete script with safety nets
        script_parts = []
        
        # 1. Ensure json import at the top if missing
        if not has_json_import:
            logger.warning("⚠️ Script missing 'import json' - adding it")
            # Find the last import statement to insert after it
            import_lines = []
            other_lines = []
            in_imports = True
            
            for line in script.split('\n'):
                if in_imports and (line.startswith('import ') or line.startswith('from ')):
                    import_lines.append(line)
                else:
                    if line.strip() and not line.startswith('#'):
                        in_imports = False
                    other_lines.append(line)
            
            # Add json import after other imports
            import_lines.append('import json')
            import_lines.append('import math')
            script = '\n'.join(import_lines) + '\n' + '\n'.join(other_lines)
        
        # 2. Ensure results dictionary is initialized
        if not has_results_dict:
            logger.warning("⚠️ Script missing 'results = {}' initialization - adding it")
            # Add after imports, before main code
            lines = script.split('\n')
            insert_pos = 0
            for i, line in enumerate(lines):
                if not (line.startswith('import ') or line.startswith('from ') or 
                       line.startswith('#') or line.strip() == ''):
                    insert_pos = i
                    break
            
            lines.insert(insert_pos, '\n# Initialize results dictionary\nresults = {}\n')
            script = '\n'.join(lines)
        
        # 3. ALWAYS append the safety net JSON save code at the end
        # This ensures that even if LLM added save code but it's buggy, we have a backup
        logger.info("✅ Adding SAFETY NET: Guaranteed JSON save code at end of script")
        
        safety_net = """

# ============================================================================
# SAFETY NET: Guaranteed JSON save (DO NOT REMOVE)
# ============================================================================
# This code ensures analysis_results.json is ALWAYS created, even if the
# script above forgot to save results or had an error in the save logic.

# Ensure results dictionary exists
if 'results' not in dir():
    print("WARNING: 'results' dictionary not found - creating empty one")
    results = {'warning': 'No results were generated by the analysis script'}

# Clean NaN and Inf values to ensure valid JSON
try:
    import math
    def clean_for_json(obj):
        if isinstance(obj, dict):
            return {k: clean_for_json(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [clean_for_json(item) for item in obj]
        elif isinstance(obj, float):
            if math.isnan(obj) or math.isinf(obj):
                return None
            return obj
        elif isinstance(obj, (pd.Series, pd.DataFrame)):
            return clean_for_json(obj.to_dict())
        return obj
    
    results = clean_for_json(results)
except Exception as e:
    print(f"WARNING: Error cleaning results: {e}")
    # Continue anyway with uncleaned results

# Save to JSON file
try:
    import json
    with open('analysis_results.json', 'w') as f:
        json.dump(results, f, indent=2, default=str)
    print("✅ SUCCESS: Results saved to analysis_results.json")
except Exception as e:
    print(f"❌ ERROR: Failed to save results to JSON: {e}")
    # Create a minimal file so the system doesn't fail
    try:
        with open('analysis_results.json', 'w') as f:
            json.dump({'error': str(e), 'partial_results': {}}, f, indent=2)
        print("⚠️ Created minimal analysis_results.json with error info")
    except:
        print("❌ CRITICAL: Cannot create analysis_results.json file at all")

print("="*70)
print("Script execution complete")
print("="*70)
"""
        
        script = script.rstrip() + safety_net
        
        logger.info(f"✅ Script completeness ensured: {len(script)} characters, guaranteed to save JSON")
        return script
    
    def _extract_recommendations(self, interpretation: str) -> List[str]:
        """Extract recommendations from interpretation"""
        # Look for recommendation sections
        rec_section = re.search(r'(?:RECOMMENDATIONS?|NEXT STEPS?|SUGGESTIONS?).*?(?=\n\n|\Z)', interpretation, re.DOTALL | re.IGNORECASE)
        if rec_section:
            recommendations = re.findall(r'-\s*(.*)', rec_section.group())
            return recommendations[:3]  # Limit to top 3
        return []
    
    def _get_fallback_suggestions(self, user_query: str, available_columns: List[str]) -> Dict[str, Any]:
        """Provide fallback suggestions when LLM fails"""
        return {
            'suggested_analysis': 'descriptive',
            'suggested_variables': available_columns[:5],
            'suggested_tests': ['summary_statistics', 'normality_test'],
            'note': 'Fallback suggestions - please review and adjust'
        }
    
    def _generate_basic_script(self, analysis_config: AnalysisConfiguration) -> str:
        """Generate basic fallback script"""
        return """
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Initialize results
results = {
    'summary_statistics': {},
    'test_results': {},
    'plots': [],
    'interpretation': 'Basic descriptive analysis completed'
}

# Basic descriptive statistics
results['summary_statistics'] = df.describe().to_dict()

print("Analysis completed successfully")
"""
    
    def _generate_basic_interpretation(self, results: Dict[str, Any]) -> str:
        """Generate basic interpretation when LLM fails"""
        return f"""
Basic Analysis Results:

The analysis has been completed successfully. Key statistics have been calculated and are available in the results.

Summary:
- Data processed successfully
- Statistical computations completed
- Results available for review

Please review the numerical results and plots for detailed insights.
"""
    
    def _build_requirements_prompt(self, analysis_config: AnalysisConfiguration,
                                 data_summary: Dict[str, Any], user_query: str) -> str:
        """Build prompt for requirements.txt generation"""
        
        return f"""
You are a Python environment expert generating a requirements.txt file for a statistical analysis project.

Analysis Configuration:
- Type: {analysis_config.analysis_type.value}
- Target Variables: {', '.join(analysis_config.target_variables)}
- Grouping Variables: {', '.join(analysis_config.grouping_variables or [])}
- User Query: {user_query}

Data Summary:
- Columns: {len(data_summary.get('columns', []))}
- Data Types: {data_summary.get('column_types', {})}
- Missing Values: {data_summary.get('missing_values', {})}

Generate a requirements.txt file that includes ALL necessary packages for this analysis.

Requirements:
1. Include essential data science packages (pandas, numpy, matplotlib, seaborn)
2. Include statistical packages (scipy, statsmodels) 
3. Include any specialized packages needed for the analysis type
4. Use specific versions for stability (e.g., pandas>=1.5.0)
5. Be comprehensive but not excessive

Format your response as:
REQUIREMENTS:
[list of requirements, one per line]

EXPLANATION:
[brief explanation of why each major package is needed]
"""
    
    def _parse_requirements_response(self, response: str) -> Tuple[str, str]:
        """Parse LLM response for requirements generation"""
        try:
            # Look for REQUIREMENTS: section
            req_match = re.search(r'REQUIREMENTS:\s*\n(.*?)\n\nEXPLANATION:', response, re.DOTALL)
            if req_match:
                requirements = req_match.group(1).strip()
            else:
                # Fallback: look for anything that looks like requirements
                lines = response.split('\n')
                req_lines = [line.strip() for line in lines if '==' in line or '>=' in line or line.strip().endswith('.txt') == False]
                requirements = '\n'.join(req_lines) if req_lines else self._get_default_requirements()
            
            # Look for explanation
            exp_match = re.search(r'EXPLANATION:\s*\n(.*?)(?:\n\n|\Z)', response, re.DOTALL)
            explanation = exp_match.group(1).strip() if exp_match else "Requirements generated for statistical analysis"
            
            return requirements, explanation
            
        except Exception as e:
            logger.error(f"Error parsing requirements response: {str(e)}")
            return self._get_default_requirements(), "Default requirements used due to parsing error"
    
    def _get_default_requirements(self) -> str:
        """Get default requirements for statistical analysis"""
        return """pandas>=1.5.0
numpy>=1.21.0
matplotlib>=3.5.0
seaborn>=0.11.0
scipy>=1.9.0
statsmodels>=0.13.0
scikit-learn>=1.1.0
plotly>=5.0.0
jupyter>=1.0.0"""
                        

Improved Code

class StatisticalAgent

Purpose

Source Code

Parameters

Parameter Details

Return Value

Class Interface

Methods

__init__(self, config)

setup_llm_clients(self)

interpret_user_query(self, user_query, data_summary, available_columns, model) -> Dict[str, Any]

generate_analysis_script(self, analysis_config, data_summary, user_query, model, previous_context) -> Dict[str, Any]

generate_requirements_txt(self, analysis_config, data_summary, user_query, model) -> Dict[str, Any]

debug_script_error(self, script, error_message, data_summary, iteration, model) -> Dict[str, Any]

interpret_results(self, results, analysis_config, user_query, model, info_sheets, interpretation_template) -> Dict[str, Any]

_build_interpretation_prompt(self, user_query, data_summary, available_columns) -> str

_build_script_generation_prompt(self, analysis_config, data_summary, user_query, previous_context) -> str

_build_debugging_prompt(self, script, error_message, data_summary, iteration) -> str

_build_interpretation_results_prompt(self, results, analysis_config, user_query, info_sheets, interpretation_template) -> str

query_llm(self, prompt, model, max_tokens) -> str

_call_llm(self, prompt, model, max_tokens) -> str

_parse_analysis_plan(self, response) -> Dict[str, Any]

_parse_analysis_plan_fallback(self, response) -> Dict[str, Any]

_parse_script_response(self, response) -> Tuple[str, str]

_parse_debug_response(self, response) -> Tuple[str, str]

_build_analysis_config(self, analysis_plan) -> AnalysisConfiguration

_extract_required_libraries(self, script) -> List[str]

_estimate_runtime(self, script) -> str

_extract_key_findings(self, interpretation) -> List[str]

_ensure_script_completeness(self, script) -> str

_extract_recommendations(self, interpretation) -> List[str]

_get_fallback_suggestions(self, user_query, available_columns) -> Dict[str, Any]

_generate_basic_script(self, analysis_config) -> str

_generate_basic_interpretation(self, results) -> str

_build_requirements_prompt(self, analysis_config, data_summary, user_query) -> str

_parse_requirements_response(self, response) -> Tuple[str, str]

_get_default_requirements(self) -> str

Required Imports

Usage Example

Tags

Similar Components

class StatisticalAgent_v1 98.5% similar

class StatisticalAgent_v2 98.3% similar

function demo_statistical_agent 60.8% similar

class LLMClient_v2 58.5% similar

class StatisticalAnalysisService 55.8% similar

✨ Improve Code: StatisticalAgent

Code Comparison

`init(self, config)`

`setup_llm_clients(self)`

`interpret_user_query(self, user_query, data_summary, available_columns, model) -> Dict[str, Any]`

`generate_analysis_script(self, analysis_config, data_summary, user_query, model, previous_context) -> Dict[str, Any]`

`generate_requirements_txt(self, analysis_config, data_summary, user_query, model) -> Dict[str, Any]`

`debug_script_error(self, script, error_message, data_summary, iteration, model) -> Dict[str, Any]`

`interpret_results(self, results, analysis_config, user_query, model, info_sheets, interpretation_template) -> Dict[str, Any]`

`_build_interpretation_prompt(self, user_query, data_summary, available_columns) -> str`

`_build_script_generation_prompt(self, analysis_config, data_summary, user_query, previous_context) -> str`

`_build_debugging_prompt(self, script, error_message, data_summary, iteration) -> str`

`_build_interpretation_results_prompt(self, results, analysis_config, user_query, info_sheets, interpretation_template) -> str`

`query_llm(self, prompt, model, max_tokens) -> str`

`_call_llm(self, prompt, model, max_tokens) -> str`

`_parse_analysis_plan(self, response) -> Dict[str, Any]`

`_parse_analysis_plan_fallback(self, response) -> Dict[str, Any]`

`_parse_script_response(self, response) -> Tuple[str, str]`

`_parse_debug_response(self, response) -> Tuple[str, str]`

`_build_analysis_config(self, analysis_plan) -> AnalysisConfiguration`

`_extract_required_libraries(self, script) -> List[str]`

`_estimate_runtime(self, script) -> str`

`_extract_key_findings(self, interpretation) -> List[str]`

`_ensure_script_completeness(self, script) -> str`

`_extract_recommendations(self, interpretation) -> List[str]`

`_get_fallback_suggestions(self, user_query, available_columns) -> Dict[str, Any]`

`_generate_basic_script(self, analysis_config) -> str`

`_generate_basic_interpretation(self, results) -> str`

`_build_requirements_prompt(self, analysis_config, data_summary, user_query) -> str`

`_parse_requirements_response(self, response) -> Tuple[str, str]`

`_get_default_requirements(self) -> str`