""" Insight Generator module for VoxPop. Generates structured insights from analyzed perspective data. """ import logging import json from typing import Dict, List, Any import time import os from datetime import datetime logger = logging.getLogger(__name__) class InsightGenerator: """ Generates structured insights from analyzed perspective data. """ def __init__(self, output_dir: str = "insights"): """ Initialize the insight generator. Args: output_dir: Directory to store generated insights """ self.output_dir = output_dir os.makedirs(output_dir, exist_ok=True) logger.info(f"Insight generator initialized with output directory: {output_dir}") def generate_insights(self, analysis_results: Dict[str, Any]) -> Dict[str, Any]: """ Generate structured insights from analysis results. Args: analysis_results: Results from the analyzer Returns: Structured insights """ logger.info("Generating insights from analysis results") # Extract insights from analysis results raw_insights = analysis_results.get("insights", []) # Format insights and filter out low confidence ones structured_insights = [] for insight in raw_insights: summary = insight.get("summary", "") confidence = insight.get("confidence", 0.0) # Filter out low confidence insights if confidence < 0.05: continue structured_insights.append({ "summary": summary, "confidence": confidence, "timestamp": datetime.now().isoformat() }) # Save insights to file timestamp = int(time.time()) output_path = os.path.join(self.output_dir, f"insights_{timestamp}.json") insights_data = { "insights": structured_insights, "metadata": { "timestamp": timestamp, "perspective_count": len(analysis_results.get("perspectives", [])), "generated_at": datetime.now().isoformat() } } try: with open(output_path, 'w') as f: json.dump(insights_data, f, indent=2) logger.info(f"Insights saved to {output_path}") except Exception as e: logger.error(f"Error saving insights to file: {e}") return insights_data def get_recent_insights(self, count: int = 10) -> List[Dict[str, Any]]: """ Get the most recent insights. Args: count: Number of recent insight files to retrieve Returns: List of insight dictionaries """ insights = [] try: # Get all insight files files = [os.path.join(self.output_dir, f) for f in os.listdir(self.output_dir) if f.startswith("insights_") and f.endswith(".json")] # Sort by modification time (most recent first) files.sort(key=lambda x: os.path.getmtime(x), reverse=True) # Load the most recent files for file_path in files[:count]: try: with open(file_path, 'r') as f: data = json.load(f) insights.append(data) except Exception as e: logger.error(f"Error loading insight file {file_path}: {e}") except Exception as e: logger.error(f"Error retrieving recent insights: {e}") return insights def get_consolidated_insights(self, days: int = 1) -> Dict[str, Any]: """ Consolidate insights from a recent time period. Args: days: Number of days to look back Returns: Consolidated insights """ # Get all insight files all_insights = [] cutoff_time = time.time() - (days * 24 * 60 * 60) try: files = [os.path.join(self.output_dir, f) for f in os.listdir(self.output_dir) if f.startswith("insights_") and f.endswith(".json")] for file_path in files: # Check if file is within the time period if os.path.getmtime(file_path) >= cutoff_time: try: with open(file_path, 'r') as f: data = json.load(f) all_insights.extend(data.get("insights", [])) except Exception as e: logger.error(f"Error loading insight file {file_path}: {e}") except Exception as e: logger.error(f"Error consolidating insights: {e}") # Group similar insights (simplified approach) consolidated = {} for insight in all_insights: summary = insight.get("summary", "") # Use first few words as a key to group similar insights key_words = " ".join(summary.split()[:3]) if key_words in consolidated: # Update confidence with max value consolidated[key_words]["confidence"] = max( consolidated[key_words]["confidence"], insight.get("confidence", 0.0) ) consolidated[key_words]["count"] += 1 else: consolidated[key_words] = { "summary": summary, "confidence": insight.get("confidence", 0.0), "count": 1, "last_seen": insight.get("timestamp", datetime.now().isoformat()) } # Convert to list and sort by confidence result = list(consolidated.values()) result.sort(key=lambda x: x["confidence"], reverse=True) return { "consolidated_insights": result, "metadata": { "period_days": days, "total_insights": len(all_insights), "consolidated_count": len(result), "generated_at": datetime.now().isoformat() } }