class StatisticalAgent_v2
LLM-powered statistical analysis agent
/tf/active/vicechatdev/smartstat/statistical_agent.py
33 - 720
moderate
Purpose
LLM-powered statistical analysis agent
Source Code
class StatisticalAgent:
"""LLM-powered statistical analysis agent"""
def __init__(self, config: Config):
self.config = config
self.setup_llm_clients()
def setup_llm_clients(self):
"""Initialize LLM clients following your existing pattern"""
# OpenAI client
if OPENAI_AVAILABLE and self.config.OPENAI_API_KEY:
self.openai_client = openai.OpenAI(api_key=self.config.OPENAI_API_KEY)
else:
self.openai_client = None
# Azure OpenAI client
if OPENAI_AVAILABLE and self.config.AZURE_OPENAI_API_KEY:
self.azure_client = openai.AzureOpenAI(
azure_endpoint=self.config.AZURE_OPENAI_ENDPOINT,
api_key=self.config.AZURE_OPENAI_API_KEY,
api_version="2024-02-15-preview"
)
else:
self.azure_client = None
# Gemini client
if GEMINI_AVAILABLE and self.config.GEMINI_API_KEY:
genai.configure(api_key=self.config.GEMINI_API_KEY)
self.gemini_client = genai.GenerativeModel('gemini-pro')
else:
self.gemini_client = None
def interpret_user_query(self, user_query: str, data_summary: Dict[str, Any],
available_columns: List[str], model: str = 'gpt-4o') -> Dict[str, Any]:
"""
Interpret natural language query and suggest analysis configuration
"""
prompt = self._build_interpretation_prompt(user_query, data_summary, available_columns)
try:
response = self._call_llm(prompt, model=model, max_tokens=1000)
# Parse the structured response
analysis_plan = self._parse_analysis_plan(response)
return {
'success': True,
'analysis_plan': analysis_plan,
'suggested_config': self._build_analysis_config(analysis_plan),
'interpretation': response,
'confidence': analysis_plan.get('confidence', 0.8)
}
except Exception as e:
logger.error(f"Error interpreting user query: {str(e)}")
return {
'success': False,
'error': str(e),
'fallback_suggestions': self._get_fallback_suggestions(user_query, available_columns)
}
def generate_analysis_script(self, analysis_config: AnalysisConfiguration,
data_summary: Dict[str, Any],
user_query: str,
model: str = 'gpt-4o',
previous_context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Generate Python script for statistical analysis with optional previous context
"""
prompt = self._build_script_generation_prompt(analysis_config, data_summary, user_query, previous_context)
try:
response = self._call_llm(prompt, model=model, max_tokens=2000)
# Extract script and explanation
script, explanation = self._parse_script_response(response)
return {
'success': True,
'script': script,
'explanation': explanation,
'estimated_runtime': self._estimate_runtime(script),
'required_libraries': self._extract_required_libraries(script)
}
except Exception as e:
logger.error(f"Error generating analysis script: {str(e)}")
return {
'success': False,
'error': str(e),
'fallback_script': self._generate_basic_script(analysis_config)
}
def generate_requirements_txt(self, analysis_config: AnalysisConfiguration,
data_summary: Dict[str, Any],
user_query: str, model: str = 'gpt-4o') -> Dict[str, Any]:
"""
Generate requirements.txt for the analysis script
"""
prompt = self._build_requirements_prompt(analysis_config, data_summary, user_query)
try:
response = self._call_llm(prompt, model=model, max_tokens=800)
# Extract requirements and explanation
requirements, explanation = self._parse_requirements_response(response)
return {
'success': True,
'requirements': requirements,
'explanation': explanation
}
except Exception as e:
logger.error(f"Error generating requirements.txt: {str(e)}")
return {
'success': False,
'error': str(e),
'fallback_requirements': self._get_default_requirements()
}
def debug_script_error(self, script: str, error_message: str,
data_summary: Dict[str, Any], iteration: int = 1, model: str = 'gpt-4o') -> Dict[str, Any]:
"""
Debug script errors using LLM (agent-mode debugging)
"""
prompt = self._build_debugging_prompt(script, error_message, data_summary, iteration)
try:
response = self._call_llm(prompt, model=model, max_tokens=1500)
# Parse debugging response
fixed_script, explanation = self._parse_debug_response(response)
return {
'success': True,
'fixed_script': fixed_script,
'explanation': explanation,
'debug_iteration': iteration,
'confidence': 0.9 - (iteration * 0.1) # Decreasing confidence with iterations
}
except Exception as e:
logger.error(f"Error debugging script: {str(e)}")
return {
'success': False,
'error': str(e),
'iteration': iteration
}
def interpret_results(self, results: Dict[str, Any], analysis_config: AnalysisConfiguration,
user_query: str, model: str = 'gpt-4o') -> Dict[str, Any]:
"""
Generate interpretation and insights from analysis results
"""
prompt = self._build_interpretation_results_prompt(results, analysis_config, user_query)
try:
response = self._call_llm(prompt, model=model, max_tokens=1500)
return {
'success': True,
'interpretation': response,
'key_findings': self._extract_key_findings(response),
'recommendations': self._extract_recommendations(response)
}
except Exception as e:
logger.error(f"Error interpreting results: {str(e)}")
return {
'success': False,
'error': str(e),
'basic_interpretation': self._generate_basic_interpretation(results)
}
def _build_interpretation_prompt(self, user_query: str, data_summary: Dict[str, Any],
available_columns: List[str]) -> str:
"""Build prompt for query interpretation"""
return f"""You are a statistical analysis expert. Analyze the user's request and suggest the most appropriate statistical analysis.
USER QUERY: "{user_query}"
DATASET INFORMATION:
- Shape: {data_summary.get('shape', 'Unknown')}
- Available columns: {', '.join(available_columns)}
- Column details: {json.dumps(data_summary.get('column_info', {}), indent=2)}
TASK: Interpret the user's request and provide a structured analysis plan in JSON format:
{{
"analysis_type": "descriptive|hypothesis_test|regression|correlation|anova|control_chart|custom",
"target_variables": ["list", "of", "target", "variables"],
"grouping_variables": ["list", "of", "grouping", "variables"],
"statistical_tests": ["list", "of", "specific", "tests"],
"assumptions_to_check": ["normality", "homogeneity", "independence", "etc"],
"significance_level": 0.05,
"description": "Detailed description of the analysis plan",
"confidence": 0.9,
"warnings": ["any", "potential", "issues"]
}}
Focus on:
1. Descriptive statistics and hypothesis testing
2. Control chart analysis for quality data
3. Checking statistical assumptions
4. Appropriate test selection based on data types and distribution
Provide only the JSON response."""
def _build_script_generation_prompt(self, analysis_config: AnalysisConfiguration,
data_summary: Dict[str, Any], user_query: str,
previous_context: Dict[str, Any] = None) -> str:
"""Build prompt for Python script generation with optional previous context"""
# Build the base prompt
prompt_parts = []
prompt_parts.append("You are a Python statistical programming expert. Generate a complete Python script for statistical analysis.")
# Add previous context if available
if previous_context and previous_context.get('count', 0) > 0:
context_type = previous_context.get('type', 'previous')
prompt_parts.append(f"\nPREVIOUS ANALYSIS CONTEXT:")
if context_type == 'selected':
prompt_parts.append(f"This is a follow-up analysis. The user has specifically selected {previous_context['count']} previous analyses to reference:")
else:
prompt_parts.append(f"This is a follow-up analysis in an iterative session. Here's what was done previously:")
prompt_parts.append(previous_context['summary'])
# Include detailed context from the most recent previous analysis
if previous_context['analyses']:
recent_analysis = previous_context['analyses'][-1] # Most recent
prompt_parts.append(f"\nMOST RECENT ANALYSIS DETAILS:")
prompt_parts.append(f"Query: {recent_analysis.get('user_query', 'Unknown')}")
if recent_analysis.get('conclusions'):
prompt_parts.append(f"Previous conclusions: {recent_analysis['conclusions'][:500]}...")
if recent_analysis.get('results_summary'):
prompt_parts.append(f"Previous results files generated:")
for result in recent_analysis['results_summary'][:3]: # Limit to first 3 files
prompt_parts.append(f"- {result['filename']}: {result['preview'][:200]}...")
if recent_analysis.get('script_content'):
# Include key parts of previous script for reference
script_lines = recent_analysis['script_content'].split('\n')
imports_and_key_code = []
for line in script_lines[:50]: # First 50 lines typically contain imports and key setup
if line.strip().startswith(('import ', 'from ', 'def ', '# Key')) or 'matplotlib' in line or 'seaborn' in line:
imports_and_key_code.append(line)
if imports_and_key_code:
prompt_parts.append(f"\nPrevious script key components (for reference):")
prompt_parts.append('\n'.join(imports_and_key_code[:20])) # Limit lines
prompt_parts.append(f"\nIMPORTANT: Build upon previous work. Reference previous findings. Avoid repeating exactly the same analysis unless specifically requested.")
prompt_parts.append(f"\nCURRENT REQUEST: \"{user_query}\"")
return f"""{chr(10).join(prompt_parts)}
ANALYSIS CONFIGURATION:
- Type: {analysis_config.analysis_type.value}
- Target variables: {analysis_config.target_variables}
- Grouping variables: {analysis_config.grouping_variables}
- Significance level: {analysis_config.significance_level}
DATASET INFO:
- Shape: {data_summary.get('shape', 'Unknown')}
- Column types: {json.dumps(data_summary.get('column_info', {}), indent=2)}
REQUIREMENTS:
1. Assume data is loaded in a DataFrame called 'df' and 'data' (alias)
2. DO NOT include import statements - modules are pre-loaded: pd, np, plt, sns, stats, sm, warnings
3. Include proper error handling and data validation
4. Generate clear plots with proper titles and labels
5. Store results in a dictionary called 'results' (already initialized)
6. Include statistical assumptions checking
7. Add comments explaining each step
8. Use plt.show() to display plots (they will be auto-saved)
EXECUTION ENVIRONMENT:
- Modules available without import: pd, np, plt, sns, stats, sm, warnings
- Data available as: df (main DataFrame), data (alias)
- Results dictionary: results (pre-initialized)
- Plot directory: plots_dir (for saving)
- Use plt.show() to display plots (they will be auto-saved)
- DO NOT call save_plot() directly - use plt.show() instead
SCRIPT STRUCTURE:
```python
# No imports needed - modules are pre-loaded
# Data available as 'df' and 'data'
# Results dictionary already initialized
# Your analysis code here...
# Add results to the pre-initialized results dictionary
results['summary_statistics'] = {{}}
results['test_results'] = {{}}
results['interpretation'] = "Brief interpretation of findings"
# Use plt.show() to display plots (auto-saved)
```
Provide the complete Python script WITHOUT import statements and with detailed comments and error handling."""
def _build_debugging_prompt(self, script: str, error_message: str,
data_summary: Dict[str, Any], iteration: int) -> str:
"""Build prompt for script debugging"""
return f"""You are a Python debugging expert. Fix the error in this statistical analysis script.
ORIGINAL SCRIPT:
```python
{script}
```
ERROR MESSAGE:
{error_message}
DATASET INFO:
- Shape: {data_summary.get('shape', 'Unknown')}
- Available columns: {list(data_summary.get('column_info', {}).keys())}
DEBUG ITERATION: {iteration}
REQUIREMENTS:
1. Identify the root cause of the error
2. Provide a fixed version of the script
3. Explain what was wrong and how you fixed it
4. Ensure the fix maintains the original analysis intent
5. Add additional error handling if needed
RESPONSE FORMAT:
EXPLANATION: [Explain the error and your fix]
FIXED SCRIPT:
```python
[Your fixed Python code here]
```
Focus on common issues:
- Column name mismatches
- Data type problems
- Missing data handling
- Library import errors
- Statistical test requirements"""
def _build_interpretation_results_prompt(self, results: Dict[str, Any],
analysis_config: AnalysisConfiguration,
user_query: str) -> str:
"""Build prompt for results interpretation"""
return f"""You are a statistical analyst. Interpret these analysis results for a non-technical audience.
USER QUERY: "{user_query}"
ANALYSIS TYPE: {analysis_config.analysis_type.value}
SIGNIFICANCE LEVEL: {analysis_config.significance_level}
RESULTS:
{json.dumps(results, indent=2, default=str)}
TASK: Provide a clear, comprehensive interpretation including:
1. SUMMARY OF FINDINGS
- What the analysis shows in plain language
- Key statistics and their meaning
2. STATISTICAL SIGNIFICANCE
- Which tests were significant/not significant
- What this means practically
3. ASSUMPTIONS CHECK
- Whether statistical assumptions were met
- Impact on interpretation if violated
4. PRACTICAL IMPLICATIONS
- What these results mean in real-world context
- Business or research implications
5. LIMITATIONS AND CAVEATS
- Any limitations of the analysis
- Factors to consider when using results
6. RECOMMENDATIONS
- Next steps or additional analyses suggested
- How to use these findings
Keep the language accessible while maintaining statistical accuracy."""
def _call_llm(self, prompt: str, model: str = "gpt-4o", max_tokens: int = 1000) -> str:
"""Call LLM with specified model"""
try:
if model.startswith('gpt-4o') or model.startswith('gpt-'):
# Use OpenAI
if self.openai_client:
response = self.openai_client.chat.completions.create(
model="gpt-4o", # Use gpt-4o for all GPT variants
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=0.1
)
return response.choices[0].message.content
else:
raise Exception("OpenAI client not available")
elif model.startswith('claude'):
# Use Anthropic Claude (requires anthropic library)
try:
import anthropic
anthropic_key = self.config.ANTHROPIC_API_KEY
if not anthropic_key or anthropic_key == 'sk-ant-api03-your-key-here':
raise Exception("Anthropic API key not configured")
client = anthropic.Anthropic(api_key=anthropic_key)
response = client.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=max_tokens,
temperature=0.1,
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
except ImportError:
raise Exception("Anthropic library not installed. Run: pip install anthropic")
elif model.startswith('gemini'):
# Use Gemini
if self.gemini_client:
response = self.gemini_client.generate_content(
prompt,
generation_config={'max_output_tokens': max_tokens, 'temperature': 0.1}
)
return response.text
else:
raise Exception("Gemini client not available")
else:
# Default to OpenAI if model not recognized
if self.openai_client:
response = self.openai_client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=0.1
)
return response.choices[0].message.content
else:
raise Exception(f"Unsupported model: {model}")
except Exception as e:
logger.error(f"LLM call failed for model {model}: {str(e)}")
raise
def _parse_analysis_plan(self, response: str) -> Dict[str, Any]:
"""Parse JSON analysis plan from LLM response"""
try:
# Extract JSON from response
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
json_str = json_match.group()
return json.loads(json_str)
else:
# Fallback parsing
return self._parse_analysis_plan_fallback(response)
except:
return self._parse_analysis_plan_fallback(response)
def _parse_analysis_plan_fallback(self, response: str) -> Dict[str, Any]:
"""Fallback parsing for analysis plan"""
return {
'analysis_type': 'descriptive',
'target_variables': [],
'grouping_variables': [],
'statistical_tests': ['descriptive_statistics'],
'assumptions_to_check': [],
'significance_level': 0.05,
'description': response,
'confidence': 0.5,
'warnings': ['Could not parse structured response']
}
def _parse_script_response(self, response: str) -> Tuple[str, str]:
"""Extract script and explanation from LLM response"""
# Look for code blocks
code_blocks = re.findall(r'```python\n(.*?)\n```', response, re.DOTALL)
if code_blocks:
script = code_blocks[0]
# Remove script from response to get explanation
explanation = re.sub(r'```python\n.*?\n```', '[SCRIPT]', response, flags=re.DOTALL)
else:
# If no code block found, treat entire response as script
script = response
explanation = "Generated analysis script"
return script.strip(), explanation.strip()
def _parse_debug_response(self, response: str) -> Tuple[str, str]:
"""Parse debugging response"""
# Look for explanation
explanation_match = re.search(r'EXPLANATION:\s*(.*?)(?=FIXED SCRIPT:)', response, re.DOTALL)
explanation = explanation_match.group(1).strip() if explanation_match else "Script fixed"
# Look for fixed script
script_match = re.search(r'FIXED SCRIPT:\s*```python\n(.*?)\n```', response, re.DOTALL)
if script_match:
fixed_script = script_match.group(1)
else:
# Fallback: look for any code block
code_blocks = re.findall(r'```python\n(.*?)\n```', response, re.DOTALL)
fixed_script = code_blocks[0] if code_blocks else response
return fixed_script.strip(), explanation
def _build_analysis_config(self, analysis_plan: Dict[str, Any]) -> AnalysisConfiguration:
"""Build analysis configuration from plan"""
analysis_type_map = {
'descriptive': AnalysisType.DESCRIPTIVE,
'hypothesis_test': AnalysisType.HYPOTHESIS_TEST,
'regression': AnalysisType.REGRESSION,
'correlation': AnalysisType.CORRELATION,
'anova': AnalysisType.ANOVA,
'control_chart': AnalysisType.CONTROL_CHART,
'custom': AnalysisType.CUSTOM
}
return AnalysisConfiguration(
analysis_type=analysis_type_map.get(analysis_plan.get('analysis_type', 'descriptive'), AnalysisType.DESCRIPTIVE),
target_variables=analysis_plan.get('target_variables', []),
grouping_variables=analysis_plan.get('grouping_variables', []),
significance_level=analysis_plan.get('significance_level', 0.05),
custom_parameters={
'statistical_tests': analysis_plan.get('statistical_tests', []),
'assumptions_to_check': analysis_plan.get('assumptions_to_check', []),
'description': analysis_plan.get('description', ''),
'warnings': analysis_plan.get('warnings', [])
}
)
def _extract_required_libraries(self, script: str) -> List[str]:
"""Extract required libraries from script"""
import_pattern = r'(?:import|from)\s+(\w+)'
imports = re.findall(import_pattern, script)
return list(set(imports))
def _estimate_runtime(self, script: str) -> str:
"""Estimate script runtime based on content"""
if 'for' in script or 'while' in script:
return "Medium (1-30 seconds)"
elif len(script.split('\n')) > 50:
return "Long (30+ seconds)"
else:
return "Fast (<1 second)"
def _extract_key_findings(self, interpretation: str) -> List[str]:
"""Extract key findings from interpretation"""
# Simple extraction - could be enhanced with NLP
lines = interpretation.split('\n')
findings = []
for line in lines:
if any(keyword in line.lower() for keyword in ['significant', 'correlation', 'difference', 'p-value', 'statistic']):
findings.append(line.strip())
return findings[:5] # Limit to top 5
def _extract_recommendations(self, interpretation: str) -> List[str]:
"""Extract recommendations from interpretation"""
# Look for recommendation sections
rec_section = re.search(r'(?:RECOMMENDATIONS?|NEXT STEPS?|SUGGESTIONS?).*?(?=\n\n|\Z)', interpretation, re.DOTALL | re.IGNORECASE)
if rec_section:
recommendations = re.findall(r'-\s*(.*)', rec_section.group())
return recommendations[:3] # Limit to top 3
return []
def _get_fallback_suggestions(self, user_query: str, available_columns: List[str]) -> Dict[str, Any]:
"""Provide fallback suggestions when LLM fails"""
return {
'suggested_analysis': 'descriptive',
'suggested_variables': available_columns[:5],
'suggested_tests': ['summary_statistics', 'normality_test'],
'note': 'Fallback suggestions - please review and adjust'
}
def _generate_basic_script(self, analysis_config: AnalysisConfiguration) -> str:
"""Generate basic fallback script"""
return """
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
# Initialize results
results = {
'summary_statistics': {},
'test_results': {},
'plots': [],
'interpretation': 'Basic descriptive analysis completed'
}
# Basic descriptive statistics
results['summary_statistics'] = df.describe().to_dict()
print("Analysis completed successfully")
"""
def _generate_basic_interpretation(self, results: Dict[str, Any]) -> str:
"""Generate basic interpretation when LLM fails"""
return f"""
Basic Analysis Results:
The analysis has been completed successfully. Key statistics have been calculated and are available in the results.
Summary:
- Data processed successfully
- Statistical computations completed
- Results available for review
Please review the numerical results and plots for detailed insights.
"""
def _build_requirements_prompt(self, analysis_config: AnalysisConfiguration,
data_summary: Dict[str, Any], user_query: str) -> str:
"""Build prompt for requirements.txt generation"""
return f"""
You are a Python environment expert generating a requirements.txt file for a statistical analysis project.
Analysis Configuration:
- Type: {analysis_config.analysis_type.value}
- Target Variables: {', '.join(analysis_config.target_variables)}
- Grouping Variables: {', '.join(analysis_config.grouping_variables or [])}
- User Query: {user_query}
Data Summary:
- Columns: {len(data_summary.get('columns', []))}
- Data Types: {data_summary.get('column_types', {})}
- Missing Values: {data_summary.get('missing_values', {})}
Generate a requirements.txt file that includes ALL necessary packages for this analysis.
Requirements:
1. Include essential data science packages (pandas, numpy, matplotlib, seaborn)
2. Include statistical packages (scipy, statsmodels)
3. Include any specialized packages needed for the analysis type
4. Use specific versions for stability (e.g., pandas>=1.5.0)
5. Be comprehensive but not excessive
Format your response as:
REQUIREMENTS:
[list of requirements, one per line]
EXPLANATION:
[brief explanation of why each major package is needed]
"""
def _parse_requirements_response(self, response: str) -> Tuple[str, str]:
"""Parse LLM response for requirements generation"""
try:
# Look for REQUIREMENTS: section
req_match = re.search(r'REQUIREMENTS:\s*\n(.*?)\n\nEXPLANATION:', response, re.DOTALL)
if req_match:
requirements = req_match.group(1).strip()
else:
# Fallback: look for anything that looks like requirements
lines = response.split('\n')
req_lines = [line.strip() for line in lines if '==' in line or '>=' in line or line.strip().endswith('.txt') == False]
requirements = '\n'.join(req_lines) if req_lines else self._get_default_requirements()
# Look for explanation
exp_match = re.search(r'EXPLANATION:\s*\n(.*?)(?:\n\n|\Z)', response, re.DOTALL)
explanation = exp_match.group(1).strip() if exp_match else "Requirements generated for statistical analysis"
return requirements, explanation
except Exception as e:
logger.error(f"Error parsing requirements response: {str(e)}")
return self._get_default_requirements(), "Default requirements used due to parsing error"
def _get_default_requirements(self) -> str:
"""Get default requirements for statistical analysis"""
return """pandas>=1.5.0
numpy>=1.21.0
matplotlib>=3.5.0
seaborn>=0.11.0
scipy>=1.9.0
statsmodels>=0.13.0
scikit-learn>=1.1.0
plotly>=5.0.0
jupyter>=1.0.0"""
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
bases: Parameter of type
Return Value
Returns unspecified type
Class Interface
Methods
__init__(self, config)
Purpose: Internal method: init
Parameters:
config: Type: Config
Returns: None
setup_llm_clients(self)
Purpose: Initialize LLM clients following your existing pattern
Returns: None
interpret_user_query(self, user_query, data_summary, available_columns, model) -> Dict[str, Any]
Purpose: Interpret natural language query and suggest analysis configuration
Parameters:
user_query: Type: strdata_summary: Type: Dict[str, Any]available_columns: Type: List[str]model: Type: str
Returns: Returns Dict[str, Any]
generate_analysis_script(self, analysis_config, data_summary, user_query, model, previous_context) -> Dict[str, Any]
Purpose: Generate Python script for statistical analysis with optional previous context
Parameters:
analysis_config: Type: AnalysisConfigurationdata_summary: Type: Dict[str, Any]user_query: Type: strmodel: Type: strprevious_context: Type: Dict[str, Any]
Returns: Returns Dict[str, Any]
generate_requirements_txt(self, analysis_config, data_summary, user_query, model) -> Dict[str, Any]
Purpose: Generate requirements.txt for the analysis script
Parameters:
analysis_config: Type: AnalysisConfigurationdata_summary: Type: Dict[str, Any]user_query: Type: strmodel: Type: str
Returns: Returns Dict[str, Any]
debug_script_error(self, script, error_message, data_summary, iteration, model) -> Dict[str, Any]
Purpose: Debug script errors using LLM (agent-mode debugging)
Parameters:
script: Type: strerror_message: Type: strdata_summary: Type: Dict[str, Any]iteration: Type: intmodel: Type: str
Returns: Returns Dict[str, Any]
interpret_results(self, results, analysis_config, user_query, model) -> Dict[str, Any]
Purpose: Generate interpretation and insights from analysis results
Parameters:
results: Type: Dict[str, Any]analysis_config: Type: AnalysisConfigurationuser_query: Type: strmodel: Type: str
Returns: Returns Dict[str, Any]
_build_interpretation_prompt(self, user_query, data_summary, available_columns) -> str
Purpose: Build prompt for query interpretation
Parameters:
user_query: Type: strdata_summary: Type: Dict[str, Any]available_columns: Type: List[str]
Returns: Returns str
_build_script_generation_prompt(self, analysis_config, data_summary, user_query, previous_context) -> str
Purpose: Build prompt for Python script generation with optional previous context
Parameters:
analysis_config: Type: AnalysisConfigurationdata_summary: Type: Dict[str, Any]user_query: Type: strprevious_context: Type: Dict[str, Any]
Returns: Returns str
_build_debugging_prompt(self, script, error_message, data_summary, iteration) -> str
Purpose: Build prompt for script debugging
Parameters:
script: Type: strerror_message: Type: strdata_summary: Type: Dict[str, Any]iteration: Type: int
Returns: Returns str
_build_interpretation_results_prompt(self, results, analysis_config, user_query) -> str
Purpose: Build prompt for results interpretation
Parameters:
results: Type: Dict[str, Any]analysis_config: Type: AnalysisConfigurationuser_query: Type: str
Returns: Returns str
_call_llm(self, prompt, model, max_tokens) -> str
Purpose: Call LLM with specified model
Parameters:
prompt: Type: strmodel: Type: strmax_tokens: Type: int
Returns: Returns str
_parse_analysis_plan(self, response) -> Dict[str, Any]
Purpose: Parse JSON analysis plan from LLM response
Parameters:
response: Type: str
Returns: Returns Dict[str, Any]
_parse_analysis_plan_fallback(self, response) -> Dict[str, Any]
Purpose: Fallback parsing for analysis plan
Parameters:
response: Type: str
Returns: Returns Dict[str, Any]
_parse_script_response(self, response) -> Tuple[str, str]
Purpose: Extract script and explanation from LLM response
Parameters:
response: Type: str
Returns: Returns Tuple[str, str]
_parse_debug_response(self, response) -> Tuple[str, str]
Purpose: Parse debugging response
Parameters:
response: Type: str
Returns: Returns Tuple[str, str]
_build_analysis_config(self, analysis_plan) -> AnalysisConfiguration
Purpose: Build analysis configuration from plan
Parameters:
analysis_plan: Type: Dict[str, Any]
Returns: Returns AnalysisConfiguration
_extract_required_libraries(self, script) -> List[str]
Purpose: Extract required libraries from script
Parameters:
script: Type: str
Returns: Returns List[str]
_estimate_runtime(self, script) -> str
Purpose: Estimate script runtime based on content
Parameters:
script: Type: str
Returns: Returns str
_extract_key_findings(self, interpretation) -> List[str]
Purpose: Extract key findings from interpretation
Parameters:
interpretation: Type: str
Returns: Returns List[str]
_extract_recommendations(self, interpretation) -> List[str]
Purpose: Extract recommendations from interpretation
Parameters:
interpretation: Type: str
Returns: Returns List[str]
_get_fallback_suggestions(self, user_query, available_columns) -> Dict[str, Any]
Purpose: Provide fallback suggestions when LLM fails
Parameters:
user_query: Type: stravailable_columns: Type: List[str]
Returns: Returns Dict[str, Any]
_generate_basic_script(self, analysis_config) -> str
Purpose: Generate basic fallback script
Parameters:
analysis_config: Type: AnalysisConfiguration
Returns: Returns str
_generate_basic_interpretation(self, results) -> str
Purpose: Generate basic interpretation when LLM fails
Parameters:
results: Type: Dict[str, Any]
Returns: Returns str
_build_requirements_prompt(self, analysis_config, data_summary, user_query) -> str
Purpose: Build prompt for requirements.txt generation
Parameters:
analysis_config: Type: AnalysisConfigurationdata_summary: Type: Dict[str, Any]user_query: Type: str
Returns: Returns str
_parse_requirements_response(self, response) -> Tuple[str, str]
Purpose: Parse LLM response for requirements generation
Parameters:
response: Type: str
Returns: Returns Tuple[str, str]
_get_default_requirements(self) -> str
Purpose: Get default requirements for statistical analysis
Returns: Returns str
Required Imports
import os
import json
import logging
import re
from typing import Dict
Usage Example
# Example usage:
# result = StatisticalAgent(bases)
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class StatisticalAgent_v1 98.9% similar
-
class StatisticalAgent 98.3% similar
-
function demo_statistical_agent 60.7% similar
-
class LLMClient_v2 58.4% similar
-
class StatisticalAnalysisService_v1 55.9% similar