solorpower_crawler/tools/recover_from_log.py
2026-03-30 13:01:18 +09:00

143 lines
5.0 KiB
Python

import sys
from pathlib import Path
# Add parent directory to sys.path to allow importing from root
sys.path.append(str(Path(__file__).parent.parent))
import os
import re
from datetime import datetime
from dotenv import load_dotenv
# 로드 환경 변수
load_dotenv()
from database import get_supabase_client, save_history
PLANT_MAP = {
"태양과바람 1호기": "nrems-01",
"태양과바람 2호기": "nrems-02",
"태양과바람 3호기": "nrems-03",
"태양과바람 4호기": "nrems-04",
"태양과바람 5호기": "kremc-05",
"태양과바람 6호기": "sunwms-06",
"태양과바람 8호기": "hyundai-08",
"태양과바람 9호기": "nrems-09",
"태양과바람 10호기": "cmsolar-10"
}
def clean_and_recover(log_path, start_time_str, end_time_str):
"""
1. Removes bad data (where current_kw == generation_kwh but current_kw should be 0)
Or simpler: remove ALL hourly data for the period and re-insert.
2. Parses log and re-inserts data.
"""
print(f"🧹 Cleaning DB data from {start_time_str} to {end_time_str}...")
# Convert local times to UTC range for deletion query
# But wait, save_history sends timezone-aware timestamp (+09:00).
# Supabase stores as UTC.
# To delete, we can use the same string range if we are careful, or convert.
# The safest way is to target the range.
# 1. Delete existing records in the range
client = get_supabase_client()
if not client:
return
# KST to UTC conversion for query
# 2026-02-12 17:00:00 KST -> 08:00 UTC
# 2026-02-13 10:00:00 KST -> 01:00 UTC (next day)
try:
start_dt = datetime.strptime(start_time_str, "%Y-%m-%d %H:%M:%S")
end_dt = datetime.strptime(end_time_str, "%Y-%m-%d %H:%M:%S")
# UTC subtract 9 hours
from datetime import timedelta
start_utc = (start_dt - timedelta(hours=9)).isoformat()
end_utc = (end_dt - timedelta(hours=9)).isoformat()
print(f" Deleting range (UTC): {start_utc} ~ {end_utc}")
# Delete solar_logs
res = client.table("solar_logs").delete() \
.gte("created_at", start_utc) \
.lte("created_at", end_utc) \
.execute()
print(f"✅ Deleted {len(res.data) if res.data else '0'} records.")
except Exception as e:
print(f"❌ Deletion failed: {e}")
# Proceed to insert anyway? Duplicates might occur if delete failed.
print(f"📂 Parsing log: {log_path}")
start_pattern = re.compile(r"통합 관제 시스템.*\((\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2})\)")
table_pattern = re.compile(r"(태양과바람 \d+호기)\s+\|\s+([\d.]+)\s+\|\s+([\d.]+)\s+\|")
current_timestamp = None
recovered_data = []
try:
with open(log_path, 'r', encoding='utf-8') as f:
for line in f:
start_match = start_pattern.search(line)
if start_match:
ts_str = start_match.group(1)
ts_dt = datetime.strptime(ts_str, "%Y-%m-%d %H:%M:%S")
if start_dt <= ts_dt <= end_dt:
current_timestamp = ts_str
else:
current_timestamp = None
continue
if current_timestamp:
table_match = table_pattern.search(line)
if table_match:
plant_name = table_match.group(1).strip()
kw = float(table_match.group(2))
kwh = float(table_match.group(3))
plant_id = PLANT_MAP.get(plant_name)
if plant_id:
recovered_data.append({
'plant_id': plant_id,
'timestamp': current_timestamp,
'current_kw': kw, # Now database.py handles 0.0 correctly
'generation_kwh': kwh
})
except Exception as e:
print(f"❌ Error parsing log: {e}")
return
print(f"✅ Found {len(recovered_data)} points to restore.")
if not recovered_data:
return
chunk_size = 100
total_saved = 0
for i in range(0, len(recovered_data), chunk_size):
chunk = recovered_data[i:i + chunk_size]
if save_history(chunk, 'hourly'):
total_saved += len(chunk)
else:
print("❌ Insert failed")
print(f"🎉 Recovery finished. {total_saved} records inserted.")
# 2. Daily stats update (optional, but safe to do)
# ... (omitted for brevity, hourly is critical data)
if __name__ == "__main__":
log_file = r"d:\dev\etc\SolorPower\crawler\log\cron.log"
# Target period: Yesterday 17:00 ~ Today 10:00
start = "2026-02-12 17:00:00"
end = "2026-02-13 10:00:00"
clean_and_recover(log_file, start, end)