discourse/analysis/app/data_fetcher.py

119 lines
5.6 KiB
Python
Raw Normal View History

2025-03-25 03:52:30 -04:00
"""
Data Fetcher module for VoxPop AI Analysis Service.
Fetches perspective data from IPFS (mocked for POC).
"""
import logging
import random
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
# Mock perspective data
MOCK_PERSPECTIVES = [
# Environment and public spaces
"We need more funding for our public parks. They're essential for community wellbeing.",
"The city should invest in renewable energy projects to reduce our carbon footprint.",
"I'm concerned about air pollution in the downtown area, especially near schools.",
"Public transportation needs improvement. Buses are often late and overcrowded.",
"We should ban single-use plastics in all city-owned facilities.",
"The river cleanup project has been a great success and should be expanded.",
"More bike lanes would reduce traffic congestion and improve public health.",
# Community services
"The library needs longer hours, particularly on weekends for working families.",
"Our community centers need more diverse programming for seniors.",
"Healthcare access in rural areas remains a serious problem that needs attention.",
"The homeless shelter is underfunded and can't meet the growing demand.",
"Our schools need more funding for arts and music education, not just STEM.",
"Police response times in my neighborhood are too slow compared to wealthier areas.",
# Economic development
"Small businesses are struggling with high rent costs in the downtown area.",
"The tax incentives for large corporations should be redirected to local businesses.",
"We need affordable housing options for middle-income families, not just luxury condos.",
"The minimum wage increase has helped many families in our community.",
"Tourism has been declining, and we need a strategy to attract more visitors.",
"The new development project will create jobs but might price out current residents."
]
# Add sentiment variations
NEGATIVE_PERSPECTIVES = [
"The city council has completely ignored infrastructure needs in my neighborhood.",
"Public transportation in this city is a disgrace compared to other major cities.",
"The recycling program is ineffective and poorly managed.",
"Our tax dollars are being wasted on unnecessary projects while basic needs go unfunded.",
"The noise pollution from the airport expansion is ruining our quality of life.",
"Property taxes are too high for the poor quality of services we receive.",
"The city's response to homeless encampments has been inhumane and ineffective."
]
POSITIVE_PERSPECTIVES = [
"The new community garden initiative has transformed our neighborhood for the better.",
"The city's free summer concert series brings people together and builds community spirit.",
"The recent road repairs have significantly improved my daily commute.",
"Our local schools have shown remarkable improvement thanks to the new funding program.",
"The city's small business grants helped me launch my company during difficult times.",
"The public art installations have made our downtown more vibrant and welcoming.",
"The new affordable housing development has allowed many families to remain in the area."
]
# Combine all perspectives
ALL_PERSPECTIVES = MOCK_PERSPECTIVES + NEGATIVE_PERSPECTIVES + POSITIVE_PERSPECTIVES
class PerspectiveFetcher:
"""
Fetches perspective data from IPFS (currently mocked).
"""
def __init__(self):
"""Initialize the perspective fetcher."""
logger.info("Initializing PerspectiveFetcher")
def fetch_perspectives(self, hashes: Optional[List[str]] = None) -> List[Dict[str, str]]:
"""
Fetch perspectives from IPFS using their hashes.
Args:
hashes: List of IPFS hashes. If None, returns mock data.
Returns:
List of perspective dictionaries with text and metadata.
"""
logger.info(f"Fetching perspectives. Hashes provided: {bool(hashes)}")
# In a real implementation, this would fetch data from IPFS
# For now, return mock data
if hashes:
# Simulate fetching specific perspectives based on hashes
# In a real implementation, this would use ipfshttpclient
perspectives = []
for hash_val in hashes:
# Use hash to deterministically select a mock perspective
index = int(hash_val[-2:], 16) % len(ALL_PERSPECTIVES)
text = ALL_PERSPECTIVES[index]
perspectives.append({
"text": text,
"hash": hash_val,
"issueId": f"issue-{random.randint(1000, 9999)}",
"timestamp": "2023-06-01T12:34:56Z",
"userAddress": f"0x{random.randint(1000, 9999):x}"
})
else:
# Return random selection of perspectives
sample_size = min(random.randint(15, 25), len(ALL_PERSPECTIVES))
selected_texts = random.sample(ALL_PERSPECTIVES, sample_size)
perspectives = []
for i, text in enumerate(selected_texts):
perspectives.append({
"text": text,
"hash": f"QmHash{i:03d}",
"issueId": f"issue-{random.randint(1000, 9999)}",
"timestamp": "2023-06-01T12:34:56Z",
"userAddress": f"0x{random.randint(1000, 9999):x}"
})
logger.info(f"Fetched {len(perspectives)} perspectives")
return perspectives