import sys from pathlib import Path # Add parent directory to sys.path to allow importing from root sys.path.append(str(Path(__file__).parent.parent)) import os import re from datetime import datetime from dotenv import load_dotenv # 로드 환경 변수 load_dotenv() from database import get_supabase_client, save_history PLANT_MAP = { "태양과바람 1호기": "nrems-01", "태양과바람 2호기": "nrems-02", "태양과바람 3호기": "nrems-03", "태양과바람 4호기": "nrems-04", "태양과바람 5호기": "kremc-05", "태양과바람 6호기": "sunwms-06", "태양과바람 8호기": "hyundai-08", "태양과바람 9호기": "nrems-09", "태양과바람 10호기": "cmsolar-10" } def clean_and_recover(log_path, start_time_str, end_time_str): """ 1. Removes bad data (where current_kw == generation_kwh but current_kw should be 0) Or simpler: remove ALL hourly data for the period and re-insert. 2. Parses log and re-inserts data. """ print(f"🧹 Cleaning DB data from {start_time_str} to {end_time_str}...") # Convert local times to UTC range for deletion query # But wait, save_history sends timezone-aware timestamp (+09:00). # Supabase stores as UTC. # To delete, we can use the same string range if we are careful, or convert. # The safest way is to target the range. # 1. Delete existing records in the range client = get_supabase_client() if not client: return # KST to UTC conversion for query # 2026-02-12 17:00:00 KST -> 08:00 UTC # 2026-02-13 10:00:00 KST -> 01:00 UTC (next day) try: start_dt = datetime.strptime(start_time_str, "%Y-%m-%d %H:%M:%S") end_dt = datetime.strptime(end_time_str, "%Y-%m-%d %H:%M:%S") # UTC subtract 9 hours from datetime import timedelta start_utc = (start_dt - timedelta(hours=9)).isoformat() end_utc = (end_dt - timedelta(hours=9)).isoformat() print(f" Deleting range (UTC): {start_utc} ~ {end_utc}") # Delete solar_logs res = client.table("solar_logs").delete() \ .gte("created_at", start_utc) \ .lte("created_at", end_utc) \ .execute() print(f"✅ Deleted {len(res.data) if res.data else '0'} records.") except Exception as e: print(f"❌ Deletion failed: {e}") # Proceed to insert anyway? Duplicates might occur if delete failed. print(f"📂 Parsing log: {log_path}") start_pattern = re.compile(r"통합 관제 시스템.*\((\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2})\)") table_pattern = re.compile(r"(태양과바람 \d+호기)\s+\|\s+([\d.]+)\s+\|\s+([\d.]+)\s+\|") current_timestamp = None recovered_data = [] try: with open(log_path, 'r', encoding='utf-8') as f: for line in f: start_match = start_pattern.search(line) if start_match: ts_str = start_match.group(1) ts_dt = datetime.strptime(ts_str, "%Y-%m-%d %H:%M:%S") if start_dt <= ts_dt <= end_dt: current_timestamp = ts_str else: current_timestamp = None continue if current_timestamp: table_match = table_pattern.search(line) if table_match: plant_name = table_match.group(1).strip() kw = float(table_match.group(2)) kwh = float(table_match.group(3)) plant_id = PLANT_MAP.get(plant_name) if plant_id: recovered_data.append({ 'plant_id': plant_id, 'timestamp': current_timestamp, 'current_kw': kw, # Now database.py handles 0.0 correctly 'generation_kwh': kwh }) except Exception as e: print(f"❌ Error parsing log: {e}") return print(f"✅ Found {len(recovered_data)} points to restore.") if not recovered_data: return chunk_size = 100 total_saved = 0 for i in range(0, len(recovered_data), chunk_size): chunk = recovered_data[i:i + chunk_size] if save_history(chunk, 'hourly'): total_saved += len(chunk) else: print("❌ Insert failed") print(f"🎉 Recovery finished. {total_saved} records inserted.") # 2. Daily stats update (optional, but safe to do) # ... (omitted for brevity, hourly is critical data) if __name__ == "__main__": log_file = r"d:\dev\etc\SolorPower\crawler\log\cron.log" # Target period: Yesterday 17:00 ~ Today 10:00 start = "2026-02-12 17:00:00" end = "2026-02-13 10:00:00" clean_and_recover(log_file, start, end)