solorpower_crawler/scripts_archive/fill_today_feb.py

"""
2월 27일 누락 시간대 보완 크롤링
"""

import sys
import os
import importlib
from datetime import datetime
from dotenv import load_dotenv

load_dotenv()

if sys.platform.startswith('win'):
    sys.stdout.reconfigure(encoding='utf-8')
    sys.stderr.reconfigure(encoding='utf-8')

current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(current_dir)

from config import get_all_plants
from database import save_history, get_supabase_client

def get_plant_config(target_id):
    plants = get_all_plants()
    for p in plants:
        if p.get('id') == target_id:
            return p
    return None

def fill_today(plant_config):
    plant_id = plant_config['id']
    plant_type = plant_config['type']
    plant_name = plant_config['name']

    print(f"\n{'='*60}")
    print(f"🚀 [{plant_name}] 오늘 데이터 보완 ({plant_id})")
    print(f"{'='*60}")

    # 크롤러 모듈 동적 임포트
    try:
        crawler_module = importlib.import_module(f"crawlers.{plant_type}")
    except ImportError:
        print(f"❌ 크롤러 모듈을 찾을 수 없습니다: crawlers/{plant_type}.py")
        return

    today = datetime.now().strftime("%Y-%m-%d")

    # 1. 현재 DB에 있는 시간대 확인
    client = get_supabase_client()
    if client:
        result = client.table("solar_logs") \
            .select("created_at") \
            .eq("plant_id", plant_id) \
            .gte("created_at", f"{today}T00:00:00+09:00") \
            .lt("created_at", f"{today}T23:59:59+09:00") \
            .execute()

        existing_hours = set()
        for rec in result.data:
            hour = rec['created_at'][:13]  # 2026-02-27T00 형식
            existing_hours.add(hour)

        print(f"   현재 DB에 있는 시간대: {len(existing_hours)}개")
        print(f"   {sorted(existing_hours)[:5]}... (샘플)")

    # 2. 시간별 데이터 크롤링
    try:
        print(f"\n⏳ [Hourly] 오늘 시간별 데이터 수집 중...")

        if hasattr(crawler_module, 'fetch_history_hourly'):
            hourly_data = crawler_module.fetch_history_hourly(plant_config, today, today)
            if hourly_data:
                print(f"   ✅ {len(hourly_data)}개 시간별 데이터 수집 완료")
                save_history(hourly_data, 'hourly')
                print(f"   ✅ DB 저장 완료")
            else:
                print("   ⚠️ 데이터 없음")
        else:
            print(f"   ⚠️ {plant_type}는 시간별 이력 수집을 지원하지 않음")

    except Exception as e:
        print(f"❌ [Hourly] 에러: {e}")
        import traceback
        traceback.print_exc()

    # 3. 일별 데이터도 업데이트
    try:
        print(f"\n⏳ [Daily] 오늘 일별 데이터 업데이트 중...")

        if hasattr(crawler_module, 'fetch_history_daily'):
            daily_data = crawler_module.fetch_history_daily(plant_config, today, today)
            if daily_data:
                print(f"   ✅ {len(daily_data)}개 일별 데이터 수집 완료")
                save_history(daily_data, 'daily')
                print(f"   ✅ DB 저장 완료")
            else:
                print("   ⚠️ 데이터 없음")
        else:
            print(f"   ⚠️ {plant_type}는 일별 이력 수집을 지원하지 않음")

    except Exception as e:
        print(f"❌ [Daily] 에러: {e}")
        import traceback
        traceback.print_exc()

    print(f"\n✅ [{plant_name}] 작업 완료\n")

def main():
    target_plants = ['kremc-05', 'nrems-09']

    print("\n" + "="*60)
    print("🌞 오늘 데이터 보완 크롤링")
    print("="*60)

    for plant_id in target_plants:
        cfg = get_plant_config(plant_id)
        if cfg:
            fill_today(cfg)
        else:
            print(f"❌ 설정을 찾을 수 없습니다: {plant_id}")

    print("\n" + "="*60)
    print("🎉 모든 작업 완료!")
    print("="*60 + "\n")

if __name__ == "__main__":
    main()