import sys import os import importlib from datetime import datetime, timezone, timedelta # Add parent directory to path sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from dotenv import load_dotenv load_dotenv(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), '.env')) from database import get_supabase_client, save_history from config import get_all_plants def cleanup_history_today(plant_id, today_str): """ Cleans up 'History' status records for the target date to avoid duplicates. """ client = get_supabase_client() if not client: return # Delete records with status='History' created within the target date range # Since created_at is timestampz, we need to be careful. # But usually save_history sets created_at to the actual data timestamp for hourly history. # Or does it? # In 'save_history' (database.py): records.append({ ..., 'created_at': final_created_at, ... }) # where final_created_at comes from the data timestamp. # So we should delete range [today 00:00:00, today 23:59:59] start_ts = f"{today_str}T00:00:00" end_ts = f"{today_str}T23:59:59" try: # We also filter by status='History' to avoid deleting real-time crawled logs (if any exist) # Real-time logs usually have status='Normal' or 'Abnormal' or empty. # History fetch sets status='History'. res = client.table('solar_logs').delete() \ .eq('plant_id', plant_id) \ .eq('status', 'History') \ .gte('created_at', start_ts) \ .lte('created_at', end_ts) \ .execute() if res.data: print(f" 🧹 Cleaned up {len(res.data)} old history records for {today_str}.") except Exception as e: print(f" âš ī¸ Cleanup failed: {e}") def fill_all_today(): plants = get_all_plants() now_kst = datetime.now(timezone(timedelta(hours=9))) today_str = now_kst.strftime("%Y-%m-%d") print(f"🚀 Starting Manual Data Fetch for TODAY: {today_str}") print("=" * 60) for plant in plants: plant_id = plant['id'] plant_name = plant['name'] plant_type = plant['type'] # Skip unknown or unsupported types if plant_type == 'unknown': continue print(f"\nProcessing [{plant_type.upper()}] {plant_name} ({plant_id})...") try: # Dynamic import module = importlib.import_module(f"crawlers.{plant_type}") # 1. Hourly Data if hasattr(module, 'fetch_history_hourly'): print(" âŗ Fetching Hourly Data...") # Cleanup previous 'History' data for today to prevent dups cleanup_history_today(plant_id, today_str) try: # fetch_history_hourly(config, start_date, end_date) data = module.fetch_history_hourly(plant, today_str, today_str) if data: # save_history handles 'hourly' -> inserts into solar_logs save_history(data, 'hourly') else: print(" âš ī¸ No Hourly data found.") except Exception as e: print(f" ❌ Hourly Fetch Error: {e}") else: print(" â„šī¸ No fetch_history_hourly method.") # 2. Daily Data (Optional, as it might not be ready yet) if hasattr(module, 'fetch_history_daily'): print(" âŗ Fetching Daily Data...") try: # fetch_history_daily(config, start_date, end_date) data = module.fetch_history_daily(plant, today_str, today_str) if data: # save_history handles 'daily' -> upserts daily_stats & updates monthly save_history(data, 'daily') else: print(" âš ī¸ No Daily data found (Site might not list today yet).") except Exception as e: print(f" ❌ Daily Fetch Error: {e}") else: print(" â„šī¸ No fetch_history_daily method.") except ImportError: print(f" ❌ Module 'crawlers.{plant_type}' not found.") except Exception as e: print(f" ❌ Error processing plant: {e}") print("\n" + "=" * 60) print("All tasks completed.") if __name__ == "__main__": fill_all_today()