import sys
from pathlib import Path

# Add parent directory to sys.path to allow importing from root
sys.path.append(str(Path(__file__).parent.parent))

import os
import re
from datetime import datetime
from dotenv import load_dotenv

# 로드 환경 변수
load_dotenv()

from database import get_supabase_client, save_history

PLANT_MAP = {
    "태양과바람 1호기": "nrems-01",
    "태양과바람 2호기": "nrems-02",
    "태양과바람 3호기": "nrems-03",
    "태양과바람 4호기": "nrems-04",
    "태양과바람 5호기": "kremc-05",
    "태양과바람 6호기": "sunwms-06",
    "태양과바람 8호기": "hyundai-08",
    "태양과바람 9호기": "nrems-09",
    "태양과바람 10호기": "cmsolar-10"
}

def clean_and_recover(log_path, start_time_str, end_time_str):
    """
    1. Removes bad data (where current_kw == generation_kwh but current_kw should be 0)
       Or simpler: remove ALL hourly data for the period and re-insert.
    2. Parses log and re-inserts data.
    """
    print(f"🧹 Cleaning DB data from {start_time_str} to {end_time_str}...")
    
    # Convert local times to UTC range for deletion query
    # But wait, save_history sends timezone-aware timestamp (+09:00).
    # Supabase stores as UTC.
    # To delete, we can use the same string range if we are careful, or convert.
    # The safest way is to target the range.
    
    # 1. Delete existing records in the range
    client = get_supabase_client()
    if not client:
        return

    # KST to UTC conversion for query
    # 2026-02-12 17:00:00 KST -> 08:00 UTC
    # 2026-02-13 10:00:00 KST -> 01:00 UTC (next day)
    
    try:
        start_dt = datetime.strptime(start_time_str, "%Y-%m-%d %H:%M:%S")
        end_dt = datetime.strptime(end_time_str, "%Y-%m-%d %H:%M:%S")
        
        # UTC subtract 9 hours
        from datetime import timedelta
        start_utc = (start_dt - timedelta(hours=9)).isoformat()
        end_utc = (end_dt - timedelta(hours=9)).isoformat()
        
        print(f"   Deleting range (UTC): {start_utc} ~ {end_utc}")
        
        # Delete solar_logs
        res = client.table("solar_logs").delete() \
            .gte("created_at", start_utc) \
            .lte("created_at", end_utc) \
            .execute()
            
        print(f"✅ Deleted {len(res.data) if res.data else '0'} records.")
        
    except Exception as e:
        print(f"❌ Deletion failed: {e}")
        # Proceed to insert anyway? Duplicates might occur if delete failed.
    
    print(f"📂 Parsing log: {log_path}")
    
    start_pattern = re.compile(r"통합 관제 시스템.*\((\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2})\)")
    table_pattern = re.compile(r"(태양과바람 \d+호기)\s+\|\s+([\d.]+)\s+\|\s+([\d.]+)\s+\|")
    
    current_timestamp = None
    recovered_data = []
    
    try:
        with open(log_path, 'r', encoding='utf-8') as f:
            for line in f:
                start_match = start_pattern.search(line)
                if start_match:
                    ts_str = start_match.group(1)
                    ts_dt = datetime.strptime(ts_str, "%Y-%m-%d %H:%M:%S")
                    
                    if start_dt <= ts_dt <= end_dt:
                        current_timestamp = ts_str
                    else:
                        current_timestamp = None
                    continue
                
                if current_timestamp:
                    table_match = table_pattern.search(line)
                    if table_match:
                        plant_name = table_match.group(1).strip()
                        kw = float(table_match.group(2))
                        kwh = float(table_match.group(3))
                        
                        plant_id = PLANT_MAP.get(plant_name)
                        if plant_id:
                            recovered_data.append({
                                'plant_id': plant_id,
                                'timestamp': current_timestamp,
                                'current_kw': kw, # Now database.py handles 0.0 correctly
                                'generation_kwh': kwh
                            })
                            
    except Exception as e:
        print(f"❌ Error parsing log: {e}")
        return

    print(f"✅ Found {len(recovered_data)} points to restore.")
    
    if not recovered_data:
        return
        
    chunk_size = 100
    total_saved = 0
    for i in range(0, len(recovered_data), chunk_size):
        chunk = recovered_data[i:i + chunk_size]
        if save_history(chunk, 'hourly'):
            total_saved += len(chunk)
        else:
            print("❌ Insert failed")
            
    print(f"🎉 Recovery finished. {total_saved} records inserted.")
    
    # 2. Daily stats update (optional, but safe to do)
    # ... (omitted for brevity, hourly is critical data)

if __name__ == "__main__":
    log_file = r"d:\dev\etc\SolorPower\crawler\log\cron.log"
    # Target period: Yesterday 17:00 ~ Today 10:00
    start = "2026-02-12 17:00:00"
    end = "2026-02-13 10:00:00"
    
    clean_and_recover(log_file, start, end)