185 lines
6.3 KiB
Python
185 lines
6.3 KiB
Python
"""
|
|
Insight Generator module for VoxPop.
|
|
Generates structured insights from analyzed perspective data.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
from typing import Dict, List, Any
|
|
import time
|
|
import os
|
|
from datetime import datetime
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class InsightGenerator:
|
|
"""
|
|
Generates structured insights from analyzed perspective data.
|
|
"""
|
|
|
|
def __init__(self, output_dir: str = "insights"):
|
|
"""
|
|
Initialize the insight generator.
|
|
|
|
Args:
|
|
output_dir: Directory to store generated insights
|
|
"""
|
|
self.output_dir = output_dir
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
logger.info(f"Insight generator initialized with output directory: {output_dir}")
|
|
|
|
def generate_insights(self, analysis_results: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Generate structured insights from analysis results.
|
|
|
|
Args:
|
|
analysis_results: Results from the analyzer
|
|
|
|
Returns:
|
|
Structured insights
|
|
"""
|
|
logger.info("Generating insights from analysis results")
|
|
|
|
# Extract insights from analysis results
|
|
raw_insights = analysis_results.get("insights", [])
|
|
|
|
# Format insights and filter out low confidence ones
|
|
structured_insights = []
|
|
|
|
for insight in raw_insights:
|
|
summary = insight.get("summary", "")
|
|
confidence = insight.get("confidence", 0.0)
|
|
|
|
# Filter out low confidence insights
|
|
if confidence < 0.05:
|
|
continue
|
|
|
|
structured_insights.append({
|
|
"summary": summary,
|
|
"confidence": confidence,
|
|
"timestamp": datetime.now().isoformat()
|
|
})
|
|
|
|
# Save insights to file
|
|
timestamp = int(time.time())
|
|
output_path = os.path.join(self.output_dir, f"insights_{timestamp}.json")
|
|
|
|
insights_data = {
|
|
"insights": structured_insights,
|
|
"metadata": {
|
|
"timestamp": timestamp,
|
|
"perspective_count": len(analysis_results.get("perspectives", [])),
|
|
"generated_at": datetime.now().isoformat()
|
|
}
|
|
}
|
|
|
|
try:
|
|
with open(output_path, 'w') as f:
|
|
json.dump(insights_data, f, indent=2)
|
|
logger.info(f"Insights saved to {output_path}")
|
|
except Exception as e:
|
|
logger.error(f"Error saving insights to file: {e}")
|
|
|
|
return insights_data
|
|
|
|
def get_recent_insights(self, count: int = 10) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get the most recent insights.
|
|
|
|
Args:
|
|
count: Number of recent insight files to retrieve
|
|
|
|
Returns:
|
|
List of insight dictionaries
|
|
"""
|
|
insights = []
|
|
|
|
try:
|
|
# Get all insight files
|
|
files = [os.path.join(self.output_dir, f) for f in os.listdir(self.output_dir)
|
|
if f.startswith("insights_") and f.endswith(".json")]
|
|
|
|
# Sort by modification time (most recent first)
|
|
files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
|
|
|
|
# Load the most recent files
|
|
for file_path in files[:count]:
|
|
try:
|
|
with open(file_path, 'r') as f:
|
|
data = json.load(f)
|
|
insights.append(data)
|
|
except Exception as e:
|
|
logger.error(f"Error loading insight file {file_path}: {e}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error retrieving recent insights: {e}")
|
|
|
|
return insights
|
|
|
|
def get_consolidated_insights(self, days: int = 1) -> Dict[str, Any]:
|
|
"""
|
|
Consolidate insights from a recent time period.
|
|
|
|
Args:
|
|
days: Number of days to look back
|
|
|
|
Returns:
|
|
Consolidated insights
|
|
"""
|
|
# Get all insight files
|
|
all_insights = []
|
|
cutoff_time = time.time() - (days * 24 * 60 * 60)
|
|
|
|
try:
|
|
files = [os.path.join(self.output_dir, f) for f in os.listdir(self.output_dir)
|
|
if f.startswith("insights_") and f.endswith(".json")]
|
|
|
|
for file_path in files:
|
|
# Check if file is within the time period
|
|
if os.path.getmtime(file_path) >= cutoff_time:
|
|
try:
|
|
with open(file_path, 'r') as f:
|
|
data = json.load(f)
|
|
all_insights.extend(data.get("insights", []))
|
|
except Exception as e:
|
|
logger.error(f"Error loading insight file {file_path}: {e}")
|
|
except Exception as e:
|
|
logger.error(f"Error consolidating insights: {e}")
|
|
|
|
# Group similar insights (simplified approach)
|
|
consolidated = {}
|
|
|
|
for insight in all_insights:
|
|
summary = insight.get("summary", "")
|
|
|
|
# Use first few words as a key to group similar insights
|
|
key_words = " ".join(summary.split()[:3])
|
|
|
|
if key_words in consolidated:
|
|
# Update confidence with max value
|
|
consolidated[key_words]["confidence"] = max(
|
|
consolidated[key_words]["confidence"],
|
|
insight.get("confidence", 0.0)
|
|
)
|
|
consolidated[key_words]["count"] += 1
|
|
else:
|
|
consolidated[key_words] = {
|
|
"summary": summary,
|
|
"confidence": insight.get("confidence", 0.0),
|
|
"count": 1,
|
|
"last_seen": insight.get("timestamp", datetime.now().isoformat())
|
|
}
|
|
|
|
# Convert to list and sort by confidence
|
|
result = list(consolidated.values())
|
|
result.sort(key=lambda x: x["confidence"], reverse=True)
|
|
|
|
return {
|
|
"consolidated_insights": result,
|
|
"metadata": {
|
|
"period_days": days,
|
|
"total_insights": len(all_insights),
|
|
"consolidated_count": len(result),
|
|
"generated_at": datetime.now().isoformat()
|
|
}
|
|
} |