solorpower_crawler/fetch_history.py


import sys
import os
import importlib
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from dotenv import load_dotenv

# .env 로드
load_dotenv()

# Windows 인코딩 문제 해결
if sys.platform.startswith('win'):
    sys.stdout.reconfigure(encoding='utf-8')
    sys.stderr.reconfigure(encoding='utf-8')

# 프로젝트 루트 경로 추가
current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(current_dir)

from config import get_all_plants
from database import save_history

def get_plant_config(target_id):
    plants = get_all_plants()
    for p in plants:
        # 일반 매칭
        if p.get('id') == target_id:
            return p

        # NREMS 분리 세대 매칭 (nrems-01, nrems-02)
        if p.get('options', {}).get('is_split'):
            if target_id == 'nrems-01':
                p['id'] = 'nrems-01'
                p['options']['split_index'] = 1
                return p
            elif target_id == 'nrems-02':
                p['id'] = 'nrems-02'
                p['options']['split_index'] = 2
                return p
    return None

def fetch_and_save(plant_config):
    plant_id = plant_config['id']
    plant_type = plant_config['type']
    plant_name = plant_config['name']
    start_date_str = plant_config.get('start_date', '2020-01-01')

    print(f"🚀 [{plant_name}] 과거 데이터 수집 시작 ({plant_id})")
    print(f"   타입: {plant_type}, 가동개시일: {start_date_str}")

    # 크롤러 모듈 동적 임포트
    try:
        crawler_module = importlib.import_module(f"crawlers.{plant_type}")
    except ImportError:
        print(f"❌ 크롤러 모듈을 찾을 수 없습니다: crawlers/{plant_type}.py")
        return

    now = datetime.now()
    today_str = now.strftime("%Y-%m-%d")
    current_year = now.year
    current_month = now.month

    # 1. 시간별 데이터 (Hourly): 이번 달 1일 ~ 오늘
    # (역순으로 가져오라고 했지만, 크롤러는 start->end로 동작하므로 범위로 호출)
    try:
        h_start = now.replace(day=1).strftime("%Y-%m-%d")
        h_end = today_str
        print(f"\n⏳ [Hourly] 수집 : {h_start} ~ {h_end}")

        if hasattr(crawler_module, 'fetch_history_hourly'):
            hourly_data = crawler_module.fetch_history_hourly(plant_config, h_start, h_end)
            if hourly_data:
                save_history(hourly_data, 'hourly')
            else:
                print("   데이터 없음")
        else:
            print(f"   {plant_type}는 시간별 이력 수집을 지원하지 않음")

    except Exception as e:
        print(f"❌ [Hourly] 에러: {e}")

    # 2. 일별 데이터 (Daily): 발전소 가동일 ~ 오늘
    # API 서버가 daily_stats를 집계하여 월/년 통계를 보여주므로, daily 데이터를 전체 기간 수집해야 함.
    try:
        # d_start = f"{current_year}-01-01"
        d_start = start_date_str # 가동 시작일부터 수집
        d_end = today_str
        print(f"\n⏳ [Daily] 수집 : {d_start} ~ {d_end}")

        if hasattr(crawler_module, 'fetch_history_daily'):
            daily_data = crawler_module.fetch_history_daily(plant_config, d_start, d_end)
            if daily_data:
                save_history(daily_data, 'daily')
            else:
                print("   데이터 없음")
        else:
             print(f"   {plant_type}는 일별 이력 수집을 지원하지 않음")

    except Exception as e:
        print(f"❌ [Daily] 에러: {e}")

    # 3. 월별 데이터 (Monthly): 사용 안함 (API가 daily_stats 집계 사용)
    # try:
    #     m_start_dt = datetime.strptime(start_date_str, "%Y-%m-%d")
    #     m_start = m_start_dt.strftime("%Y-%m")
    #     m_end = now.strftime("%Y-%m")
    #     print(f"\n⏳ [Monthly] 수집 : {m_start} ~ {m_end}")
    #
    #     if hasattr(crawler_module, 'fetch_history_monthly'):
    #         monthly_data = crawler_module.fetch_history_monthly(plant_config, m_start, m_end)
    #         if monthly_data:
    #             save_history(monthly_data, 'monthly')
    #         else:
    #             print("   데이터 없음")
    #     else:
    #         print(f"   {plant_type}는 월별 이력 수집을 지원하지 않음")
    #
    # except Exception as e:
    #     print(f"❌ [Monthly] 에러: {e}")

    except Exception as e:
        print(f"❌ [Monthly] 에러: {e}")

    print(f"\n✅ [{plant_name}] 모든 작업 완료")

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python fetch_history.py <plant_id>")
        sys.exit(1)

    target_plant_id = sys.argv[1]
    cfg = get_plant_config(target_plant_id)

    if cfg:
        fetch_and_save(cfg)
    else:
        print(f"❌ 설정을 찾을 수 없습니다: {target_plant_id}")