commit 20ef587800456c9e156bb19cd3c23518825cca55 Author: haneulai Date: Fri Jan 30 11:43:08 2026 +0900 Initial commit with fixed daily summary logic diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a4c8446 --- /dev/null +++ b/.gitignore @@ -0,0 +1,49 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +.pytest_cache/ +.coverage +htmlcov/ +.tox/ +.nox/ +.venv +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Environment Variables +.env +.env.local + +# IDE +.vscode/ +.idea/ + +# Custom +*.log +*.sqlite3 +crawler_manager.db +temp_env/ +tests/db_dump.csv +tests/results.csv +tests/*_log.txt diff --git a/config.py b/config.py new file mode 100644 index 0000000..9b8f5af --- /dev/null +++ b/config.py @@ -0,0 +1,208 @@ +# ========================================== +# config.py - 다중 업체(Multi-Tenant) 설정 관리 +# ========================================== + +# --------------------------------------------------------- +# [시스템 상수] 각 크롤러 시스템의 URL 및 엔드포인트 +# --------------------------------------------------------- +SYSTEM_CONSTANTS = { + 'nrems': { + 'api_url': 'http://www.nrems.co.kr/v2/local/proc/index_proc.php', + 'detail_url': 'http://www.nrems.co.kr/v2/local/comp/cp_inv.php', + 'inv_proc_url': 'http://www.nrems.co.kr/v2/local/proc/cp_inv_proc.php' + }, + 'kremc': { + 'login_url': 'https://kremc.kr/api/v2.2/login', + 'api_base': 'https://kremc.kr/api/v2.2', + 'enso_type': '15001' + }, + 'sun_wms': { + 'base_url': 'http://tb6.sun-wms.com', + 'login_url': 'http://tb6.sun-wms.com/public/main/login_chk.php', + 'data_url': 'http://tb6.sun-wms.com/public/main/realdata.php', + 'statics_url': 'http://tb6.sun-wms.com/public/statics/statics.php' + }, + 'hyundai': { + 'base_url': 'https://hs3.hyundai-es.co.kr', + 'login_path': '/hismart/login', + 'data_path': '/hismart/site/getSolraUnitedWork' + }, + 'cmsolar': { + 'base_url': 'http://www.cmsolar2.kr', + 'api_url': 'http://www.cmsolar2.kr', + 'login_url': 'http://www.cmsolar2.kr/login_ok.php', + 'data_url': 'http://www.cmsolar2.kr/plant/sub/report_ok.php' + } +} + +# --------------------------------------------------------- +# [업체 목록] 업체 > 발전소 계층 구조 +# --------------------------------------------------------- +COMPANIES = [ + { + 'company_id': 'sunwind', + 'company_name': '태양과바람', + 'plants': [ + # NREMS 계열 - 1, 2호기 (분리 처리) + # id는 크롤러 내부에서 'nrems-01', 'nrems-02'로 분리 할당 + { + 'name': '1호기, 2호기', + 'display_name': 'SPLIT_1_2', + 'type': 'nrems', + 'auth': { + 'pscode': 'duce2023072288' + }, + 'options': { + 'is_split': True + }, + 'start_date': '2014-03-31', + 'capacity_kw': 100.0 # 1호기 50kW + 2호기 50kW + # id는 크롤러에서 동적 할당 (nrems-01, nrems-02) + }, + # NREMS 계열 - 3호기 + { + 'id': 'nrems-03', + 'name': '3호기', + 'type': 'nrems', + 'auth': { + 'pscode': 'dc2023121086' + }, + 'options': { + 'is_split': False + }, + 'start_date': '2015-12-22', + 'capacity_kw': 99.82 + }, + # NREMS 계열 - 4호기 + { + 'id': 'nrems-04', + 'name': '4호기', + 'type': 'nrems', + 'auth': { + 'pscode': 'dc2023121085' + }, + 'options': { + 'is_split': False + }, + 'start_date': '2017-01-11', + 'capacity_kw': 88.2 + }, + # NREMS 계열 - 9호기 + { + 'id': 'nrems-09', + 'name': '9호기', + 'type': 'nrems', + 'auth': { + 'pscode': 'a2020061008' + }, + 'options': { + 'is_split': False + }, + 'start_date': '2020-10-28', + 'capacity_kw': 99.12 + }, + # KREMC - 5호기 + { + 'id': 'kremc-05', + 'name': '5호기', + 'type': 'kremc', + 'auth': { + 'user_id': '서대문도서관', + 'password': 'sunhope5!' + }, + 'options': { + 'cid': '10013000376', + 'cityProvCode': '11', + 'rgnCode': '11410', + 'dongCode': '1141011700' + }, + 'start_date': '2018-06-28', + 'capacity_kw': 42.7 + }, + # Sun-WMS - 6호기 + { + 'id': 'sunwms-06', + 'name': '6호기', + 'type': 'sun_wms', + 'auth': { + 'payload_id': 'kc0fXUW0LUm2wZa+2NQI0Q==', + 'payload_pw': 'PGXjU6ib2mKYwtrh2i3fIQ==' + }, + 'options': {}, + 'start_date': '2019-12-30', + 'capacity_kw': 49.9 + }, + # 현대 - 8호기 + { + 'id': 'hyundai-08', + 'name': '8호기', + 'type': 'hyundai', + 'auth': { + 'user_id': 'epecoop', + 'password': 'sunhope0419', + 'site_id': 'M0494' + }, + 'options': {}, + 'start_date': '2020-02-06', + 'capacity_kw': 99.9 + }, + # CMSolar - 10호기 + { + 'id': 'cmsolar-10', + 'name': '10호기', + 'type': 'cmsolar', + 'auth': { + 'login_id': 'sy7144', + 'login_pw': 'sy7144', + 'site_no': '834' + }, + 'options': {}, + 'start_date': '2020-08-31', + 'capacity_kw': 31.5 + } + ] + } +] + +# --------------------------------------------------------- +# [헬퍼 함수] 평탄화된 발전소 리스트 반환 +# --------------------------------------------------------- +def get_all_plants(): + """ + 모든 업체의 발전소 정보를 평탄화하여 반환 + """ + all_plants = [] + + for company in COMPANIES: + company_id = company.get('company_id', '') + company_name = company.get('company_name', '') + + for plant in company.get('plants', []): + plant_type = plant.get('type', '') + system_config = SYSTEM_CONSTANTS.get(plant_type, {}) + + plant_info = { + 'company_id': company_id, + 'company_name': company_name, + 'id': plant.get('id', ''), # DB용 고유 ID + 'name': plant.get('name', ''), + 'display_name': plant.get('display_name', plant.get('name', '')), + 'type': plant_type, + 'auth': plant.get('auth', {}), + 'options': plant.get('options', {}), + 'start_date': plant.get('start_date', ''), + 'capacity_kw': plant.get('capacity_kw', 0.0), + 'system': system_config + } + + all_plants.append(plant_info) + + return all_plants + +def get_plants_by_company(company_id): + """특정 업체의 발전소만 반환""" + return [p for p in get_all_plants() if p['company_id'] == company_id] + +def get_plants_by_type(plant_type): + """특정 타입의 발전소만 반환""" + return [p for p in get_all_plants() if p['type'] == plant_type] diff --git a/crawler_gui.py b/crawler_gui.py new file mode 100644 index 0000000..09c7b9e --- /dev/null +++ b/crawler_gui.py @@ -0,0 +1,404 @@ +import tkinter as tk +from tkinter import ttk, messagebox, scrolledtext +import threading +import subprocess +import sys +import os +import json +import sqlite3 +from datetime import datetime +import time + +# 프로젝트 루트 경로 추가 +current_dir = os.path.dirname(os.path.abspath(__file__)) +project_root = os.path.dirname(current_dir) +sys.path.append(project_root) + +# 모듈 import 시도 (실패 시 예외처리) +try: + from config import get_all_plants + from crawler_manager import CrawlerManager +except ImportError: + # GUI 단독 실행 시 더미 데이터 사용 가능하도록 + pass + +class CrawlerControlPanel: + def __init__(self, root): + self.root = root + self.root.title("☀️ 태양광 발전 통합 관제 시스템 [관리자 모드]") + self.root.geometry("1100x750") + self.root.configure(bg="#f0f2f5") + + # 스타일 설정 + self.setup_styles() + + # 데이터 매니저 초기화 + try: + self.manager = CrawlerManager(os.path.join(project_root, "crawler_manager.db")) + self.plants = get_all_plants() + except: + self.manager = None + self.plants = [] + + # 메인 레이아웃 + self.create_layout() + + # 초기 데이터 로드 + self.refresh_monitor() + + def setup_styles(self): + style = ttk.Style() + style.theme_use('clam') + + # 프리미엄 색상 팔레트 + colors = { + 'primary': '#2563eb', + 'secondary': '#64748b', + 'success': '#16a34a', + 'danger': '#dc2626', + 'bg': '#f8fafc', + 'card': '#ffffff' + } + + style.configure("Header.TLabel", font=("Malgun Gothic", 16, "bold"), background="#f0f2f5", foreground="#1e293b") + style.configure("Section.TLabel", font=("Malgun Gothic", 12, "bold"), background="#f0f2f5", foreground="#334155") + + style.configure("Card.TFrame", background="#ffffff", relief="flat") + + # 트리뷰 스타일 (표) + style.configure("Treeview", + background="#ffffff", + fieldbackground="#ffffff", + font=("Malgun Gothic", 10), + rowheight=30 + ) + style.configure("Treeview.Heading", + font=("Malgun Gothic", 10, "bold"), + background="#e2e8f0", + foreground="#1e293b" + ) + + # 버튼 스타일 + style.configure("Action.TButton", font=("Malgun Gothic", 10), padding=6) + style.map("Action.TButton", background=[("active", "#dbeafe")]) + + def create_layout(self): + # 상단 헤더 + header_frame = ttk.Frame(self.root, padding="20 20 20 10") + header_frame.pack(fill="x") + + ttk.Label(header_frame, text="⚡ SolorPower Crawler Control", style="Header.TLabel").pack(side="left") + + status_frame = ttk.Frame(header_frame) + status_frame.pack(side="right") + self.status_label = ttk.Label(status_frame, text="🟢 시스템 대기중", font=("Malgun Gothic", 10), foreground="green") + self.status_label.pack() + + # 메인 컨텐츠 (좌우 분할) + main_paned = ttk.PanedWindow(self.root, orient="horizontal") + main_paned.pack(fill="both", expand=True, padx=20, pady=10) + + # 좌측 패널: 발전소 목록 및 제어 + left_frame = ttk.Frame(main_paned) + main_paned.add(left_frame, weight=2) + + # 우측 패널: 로그 및 상세 정보 + right_frame = ttk.Frame(main_paned) + main_paned.add(right_frame, weight=1) + + # --- 좌측 패널 구성 --- + # 1. 제어 버튼 그룹 + control_frame = ttk.LabelFrame(left_frame, text="통합 제어", padding=15) + control_frame.pack(fill="x", pady=(0, 15)) + + btn_grid = ttk.Frame(control_frame) + btn_grid.pack(fill="x") + + ttk.Button(btn_grid, text="▶ 전체 수집 시작", command=self.run_all_crawlers, style="Action.TButton").pack(side="left", padx=5) + ttk.Button(btn_grid, text="🔄 새로고침", command=self.refresh_monitor, style="Action.TButton").pack(side="left", padx=5) + ttk.Button(btn_grid, text="📊 통계 요약 실행", command=self.run_daily_summary, style="Action.TButton").pack(side="left", padx=5) + + # 2. 발전소 모니터링 테이블 + table_frame = ttk.LabelFrame(left_frame, text="발전소 모니터링 현황", padding=10) + table_frame.pack(fill="both", expand=True) + + columns = ("site_id", "name", "type", "status", "schedule", "last_run", "action", "history") + self.tree = ttk.Treeview(table_frame, columns=columns, show="tree headings", selectmode="browse") + + self.tree.heading("site_id", text="ID") + self.tree.heading("name", text="발전소명") + self.tree.heading("type", text="타입") + self.tree.heading("status", text="상태") + self.tree.heading("schedule", text="스케줄") + self.tree.heading("last_run", text="최근 실행") + self.tree.heading("action", text="개별 제어") + self.tree.heading("history", text="과거 데이터") + + self.tree.column("site_id", width=80) + self.tree.column("name", width=150) + self.tree.column("type", width=80) + self.tree.column("status", width=80) + self.tree.column("schedule", width=100) + self.tree.column("last_run", width=140) + self.tree.column("action", width=80) + self.tree.column("history", width=80) + + scrollbar = ttk.Scrollbar(table_frame, orient="vertical", command=self.tree.yview) + self.tree.configure(yscroll=scrollbar.set) + + self.tree.pack(side="left", fill="both", expand=True) + scrollbar.pack(side="right", fill="y") + + # 우클릭 메뉴 (복구) + self.context_menu = tk.Menu(self.root, tearoff=0) + self.context_menu.add_command(label="▶ 이 사이트만 즉시 실행", command=self.run_selected_crawler) + self.context_menu.add_command(label="📑 상세 로그 보기", command=self.show_site_logs) + self.context_menu.add_separator() + self.context_menu.add_command(label="🔄 학습 모드로 리셋", command=self.reset_learning_mode) + + # 이벤트 바인딩 + self.tree.bind("", self.on_tree_click) + self.tree.bind("", self.show_context_menu) + self.tree.bind("", lambda e: self.run_selected_crawler()) + + # --- 우측 패널 구성 --- + # 실시간 로그 뷰어 + log_frame = ttk.LabelFrame(right_frame, text="실시간 시스템 로그", padding=10) + log_frame.pack(fill="both", expand=True) + + self.log_text = scrolledtext.ScrolledText(log_frame, state='disabled', font=("Consolas", 9), bg="#1e293b", fg="#e2e8f0") + self.log_text.pack(fill="both", expand=True) + + # 태그 설정 (로그 색상) + self.log_text.tag_config("INFO", foreground="#60a5fa") + self.log_text.tag_config("SUCCESS", foreground="#4ade80") + self.log_text.tag_config("ERROR", foreground="#f87171") + self.log_text.tag_config("WARNING", foreground="#fbbf24") + + def log(self, message, level="INFO"): + """로그 창에 메시지 출력""" + timestamp = datetime.now().strftime("%H:%M:%S") + full_msg = f"[{timestamp}] {message}\n" + + self.log_text.configure(state='normal') + self.log_text.insert("end", full_msg, level) + self.log_text.see("end") + self.log_text.configure(state='disabled') + + def refresh_monitor(self): + """테이블 데이터 새로고침""" + # 기존 항목 제거 + for i in self.tree.get_children(): + self.tree.delete(i) + + if not self.manager: + self.log("DB 매니저 로드 실패", "ERROR") + return + + # DB에서 최신 상태 조회 + site_stats = {s['site_id']: s for s in self.manager.get_all_sites()} + + # 중복 회사 노드 방지용 + added_companies = set() + + for plant in self.plants: + # 1,2호기 분리 로직 반영 + is_split = plant.get('options', {}).get('is_split', False) + company_name = plant.get('company_name', '') + plant_name = plant.get('name', '') + + sub_units = [] + if is_split: + sub_units.append({'id': 'nrems-01', 'name': f'{company_name} 1호기', 'type': plant['type']}) + sub_units.append({'id': 'nrems-02', 'name': f'{company_name} 2호기', 'type': plant['type']}) + else: + plant_id = plant.get('id', '') + if plant_id: + sub_units.append({'id': plant_id, 'name': f'{company_name} {plant_name}', 'type': plant['type']}) + + for unit in sub_units: + site_id = unit['id'] + stat = site_stats.get(site_id, {}) + + status_text = stat.get('status', 'UNREGISTERED') + schedule_text = f"매시 {stat.get('target_minute', -1)}분" if stat.get('target_minute', -1) >= 0 else "학습중" + last_run = stat.get('last_run', '-') or '-' + if last_run != '-': + try: + last_run = last_run.split('.')[0].replace('T', ' ') # 포맷팅 + except: pass + + # 태그 설정 (색상) + row_tag = "normal" + if status_text == 'OPTIMIZED': row_tag = "optimized" + + # 회사 노드 확인 및 생성 + company_id = plant.get('company_id', 'unknown') + if company_id not in added_companies: + self.tree.insert("", "end", iid=company_id, text=company_name, values=( + "", company_name, "GROUP", "", "", "", "", "" + ), open=True) + added_companies.add(company_id) + + # 발전소 노드 추가 (회사 노드 하위) + self.tree.insert(company_id, "end", iid=site_id, values=( + site_id, + unit['name'], + unit['type'].upper(), + status_text, + schedule_text, + last_run, + "▶ 실행", + "📥 수집" + ), tags=(row_tag,)) + + self.tree.tag_configure("optimized", foreground="#059669") # 진한 녹색 + self.log("모니터링 상태 갱신 완료 (계층형)", "INFO") + + def on_tree_click(self, event): + """트리뷰 클릭 이벤트 처리""" + try: + region = self.tree.identify_region(event.x, event.y) + if region != "cell": return + + col = self.tree.identify_column(event.x) + item_id = self.tree.identify_row(event.y) + + if not item_id: return + + # 컬럼 인덱스 확인 (columns 배열 기준 1-based, #1=site_id, ... #7=action, #8=history) + # Treeview columns: ("site_id", "name", "type", "status", "schedule", "last_run", "action", "history") + # Display columns include transparent tree column if show="tree headings" + # identify_column returns '#N'. + # #1: site_id, #7: action, #8: history + + if col == '#7': # Action (실행) + self.log(f"'{item_id}' 실행 요청", "INFO") + # TODO: 개별 실행 + self.run_process_thread(["main.py", "--site", item_id], f"{item_id} 수집") + + elif col == '#8': # History (과거 데이터) + # 그룹 노드는 제외 + if self.tree.parent(item_id) == "": + return + if messagebox.askyesno("과거 데이터 수집", f"'{item_id}'의 과거 내역을 수집하시겠습니까?\n(시간별/일별/월별 전체)"): + self.run_process_thread(["fetch_history.py", item_id], f"{item_id} 히스토리 수집") + + except Exception as e: + self.log(f"클릭 처리 중 오류: {e}", "ERROR") + + def show_context_menu(self, event): + item = self.tree.identify_row(event.y) + if item: + self.tree.selection_set(item) + self.context_menu.post(event.x_root, event.y_root) + + def run_process_thread(self, cmd_list, description): + """백그라운드 스레드에서 서브프로세스 실행""" + def task(): + self.status_label.config(text=f"⏳ {description} 중...", foreground="orange") + self.log(f"{description} 시작...", "INFO") + + try: + # python 실행 경로 확보 + python_exe = sys.executable + + # 가상환경 venv/temp_env 사용 시 경로 조정 + venv_python = os.path.join(project_root, "venv", "Scripts", "python.exe") + temp_env_python = os.path.join(current_dir, "temp_env", "Scripts", "python.exe") + + if os.path.exists(temp_env_python): + python_exe = temp_env_python + elif os.path.exists(venv_python): + python_exe = venv_python + + full_cmd = [python_exe] + cmd_list + + # 서브프로세스 실행 + process = subprocess.Popen( + full_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=current_dir, + text=True, + encoding='utf-8', + errors='replace' # 인코딩 에러 방지 + ) + + stdout, stderr = process.communicate() + + if stdout: + for line in stdout.splitlines(): + if "Error" in line or "fail" in line.lower(): + self.log(line, "ERROR") + else: + self.log(line, "INFO") + + if stderr: + self.log(f"STDERR: {stderr}", "WARNING") + + if process.returncode == 0: + self.log(f"{description} 완료 ✅", "SUCCESS") + else: + self.log(f"{description} 실패 (Exit Code: {process.returncode})", "ERROR") + + except Exception as e: + self.log(f"실행 오류: {e}", "ERROR") + + finally: + self.root.after(0, self.refresh_monitor) + self.root.after(0, lambda: self.status_label.config(text="🟢 시스템 대기중", foreground="green")) + + thread = threading.Thread(target=task) + thread.daemon = True + thread.start() + + def run_all_crawlers(self): + """전체 통합 크롤링 실행 (강제 모드)""" + if messagebox.askyesno("확인", "모든 발전소 데이터를 강제로 수집하시겠습니까?"): + self.run_process_thread(["main.py", "--force"], "전체 데이터 수집") + + def run_selected_crawler(self): + """선택된 단일 사이트 크롤링 (현재 main.py는 단일 실행 옵션이 없어서 전체를 돌리되, 추후 개선 필요)""" + # 임시로 단일 실행 기능이 없으므로 알림만 띄움 (추후 main.py에 --site 옵션 추가 필요) + selected = self.tree.selection() + if not selected: + return + + site_id = selected[0] + # main.py 수정 없이 특정 사이트만 돌리기 어려우므로, 안내 메시지 + # 실제로는 main.py에 인자 처리를 추가해야 함. + # 여기서는 전체 실행으로 대체하거나, 추후 main.py 업데이트 후 구현 + + # 임시 구현: main.py를 호출하되 필터링은 구현 안 되어있음. + # 이번 단계에서는 GUI 틀을 만드는 것이므로 전체 실행으로 트리거 + self.log(f"'{site_id}' 단일 실행 요청 (현재는 전체 실행으로 동작)", "WARNING") + self.run_process_thread(["main.py", "--force"], f"'{site_id}' 데이터 수집") + + def run_daily_summary(self): + """일일 통계 집계 실행""" + self.run_process_thread(["daily_summary.py"], "일일 통계 집계") + + def show_site_logs(self): + selected = self.tree.selection() + if selected: + site_id = selected[0] + self.log(f"'{site_id}' 로그 조회 기능은 아직 구현되지 않았습니다.", "INFO") + + def reset_learning_mode(self): + selected = self.tree.selection() + if selected: + site_id = selected[0] + if self.manager.reset_to_learning(site_id): + self.log(f"'{site_id}' 학습 모드로 리셋 완료", "SUCCESS") + self.refresh_monitor() + +if __name__ == "__main__": + root = tk.Tk() + + # 아이콘 설정 (옵션) + # try: root.iconbitmap("icon.ico") + # except: pass + + app = CrawlerControlPanel(root) + root.mainloop() diff --git a/crawler_manager.py b/crawler_manager.py new file mode 100644 index 0000000..3b1a3fd --- /dev/null +++ b/crawler_manager.py @@ -0,0 +1,369 @@ +# ========================================== +# crawler_manager.py - 크롤링 스케줄 최적화 미들웨어 +# ========================================== +# NAS 리소스 절약을 위해 SQLite 기반으로 각 사이트의 +# 업데이트 패턴을 학습하고 최적 시점에만 크롤링 실행 + +import sqlite3 +from datetime import datetime, timedelta +from pathlib import Path + + +class CrawlerManager: + """ + 크롤링 스케줄을 자동으로 최적화하는 매니저 클래스 + + - LEARNING 상태: 모든 크롤링 허용 (패턴 학습 중) + - OPTIMIZED 상태: 학습된 업데이트 시점 전후에만 크롤링 허용 + """ + + def __init__(self, db_path: str = None): + """ + DB 연결 및 테이블 초기화 + + Args: + db_path: SQLite DB 파일 경로. 기본값은 스크립트와 같은 디렉토리의 crawler_manager.db + """ + if db_path is None: + db_path = Path(__file__).parent / "crawler_manager.db" + + self.db_path = str(db_path) + self._init_db() + + def _init_db(self): + """테이블이 없으면 생성""" + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + cursor.execute(""" + CREATE TABLE IF NOT EXISTS site_rules ( + site_id TEXT PRIMARY KEY, + status TEXT DEFAULT 'LEARNING', + target_minute INTEGER DEFAULT -1, + start_date TEXT, + last_run TEXT + ) + """) + conn.commit() + + def _get_connection(self) -> sqlite3.Connection: + """SQLite 연결 반환""" + return sqlite3.connect(self.db_path) + + def register_site(self, site_id: str) -> bool: + """ + 새로운 사이트 등록 + + Args: + site_id: 사이트 식별자 (예: 'nrems-01') + + Returns: + bool: 새로 등록되었으면 True, 이미 존재하면 False + """ + with self._get_connection() as conn: + cursor = conn.cursor() + + # 이미 존재하는지 확인 + cursor.execute("SELECT 1 FROM site_rules WHERE site_id = ?", (site_id,)) + if cursor.fetchone(): + return False + + # 새로 등록 + today = datetime.now().strftime("%Y-%m-%d") + cursor.execute(""" + INSERT INTO site_rules (site_id, status, target_minute, start_date, last_run) + VALUES (?, 'LEARNING', -1, ?, NULL) + """, (site_id, today)) + conn.commit() + + print(f" 📝 [CrawlerManager] '{site_id}' 신규 등록 (LEARNING 모드)") + return True + + def should_run(self, site_id: str) -> bool: + """ + 현재 시점에 해당 사이트를 크롤링해야 하는지 판단 + + Args: + site_id: 사이트 식별자 + + Returns: + bool: 크롤링 실행 여부 + """ + now = datetime.now() + current_hour = now.hour + current_minute = now.minute + + # 야간 모드: 21시 ~ 05시에는 크롤링 중지 + if current_hour >= 21 or current_hour < 5: + return False + + with self._get_connection() as conn: + cursor = conn.cursor() + cursor.execute(""" + SELECT status, target_minute, last_run + FROM site_rules + WHERE site_id = ? + """, (site_id,)) + row = cursor.fetchone() + + # 등록되지 않은 사이트면 일단 등록 후 True 반환 + if not row: + self.register_site(site_id) + return True + + status, target_minute, last_run = row + + # LEARNING 상태: 항상 실행 허용 (패턴 학습 목적) + if status == "LEARNING": + return True + + # OPTIMIZED 상태: 최적화된 시간대에만 실행 + if status == "OPTIMIZED" and target_minute >= 0: + # target_minute 이후 10분 윈도우 내에서만 허용 + # 예: target_minute=15 → 15~24분 사이에만 실행 + window_start = target_minute + window_end = (target_minute + 10) % 60 + + # 윈도우가 시간 경계를 넘는 경우 (예: 55~04분) + if window_start <= window_end: + in_window = window_start <= current_minute < window_end + else: + in_window = current_minute >= window_start or current_minute < window_end + + if not in_window: + return False + + # 중복 실행 방지: 최근 1시간 내 실행 이력이 있으면 스킵 + if last_run: + try: + last_run_dt = datetime.fromisoformat(last_run) + if now - last_run_dt < timedelta(hours=1): + return False + except (ValueError, TypeError): + pass + + return True + + # 기타 상태는 기본적으로 허용 + return True + + def update_optimization(self, site_id: str, detected_minute: int) -> bool: + """ + 사이트의 업데이트 패턴이 감지되면 OPTIMIZED 상태로 전환 + + Args: + site_id: 사이트 식별자 + detected_minute: 업데이트가 감지된 분 (0~59) + + Returns: + bool: 업데이트 성공 여부 + """ + if not 0 <= detected_minute <= 59: + print(f" ⚠️ [CrawlerManager] 유효하지 않은 minute 값: {detected_minute}") + return False + + with self._get_connection() as conn: + cursor = conn.cursor() + cursor.execute(""" + UPDATE site_rules + SET status = 'OPTIMIZED', target_minute = ? + WHERE site_id = ? + """, (detected_minute, site_id)) + conn.commit() + + if cursor.rowcount > 0: + print(f" ✅ [CrawlerManager] '{site_id}' → OPTIMIZED (매시 {detected_minute}분)") + return True + else: + print(f" ⚠️ [CrawlerManager] '{site_id}' 사이트를 찾을 수 없음") + return False + + def record_run(self, site_id: str): + """ + 크롤링 성공 시 마지막 실행 시간 기록 + + Args: + site_id: 사이트 식별자 + """ + now_str = datetime.now().isoformat() + + with self._get_connection() as conn: + cursor = conn.cursor() + cursor.execute(""" + UPDATE site_rules + SET last_run = ? + WHERE site_id = ? + """, (now_str, site_id)) + conn.commit() + + def get_site_info(self, site_id: str) -> dict: + """ + 사이트 정보 조회 (디버깅/모니터링용) + + Args: + site_id: 사이트 식별자 + + Returns: + dict: 사이트 정보 또는 None + """ + with self._get_connection() as conn: + cursor = conn.cursor() + cursor.execute(""" + SELECT site_id, status, target_minute, start_date, last_run + FROM site_rules + WHERE site_id = ? + """, (site_id,)) + row = cursor.fetchone() + + if row: + return { + "site_id": row[0], + "status": row[1], + "target_minute": row[2], + "start_date": row[3], + "last_run": row[4] + } + return None + + def get_all_sites(self) -> list: + """ + 모든 사이트 정보 조회 + + Returns: + list: 모든 사이트 정보 리스트 + """ + with self._get_connection() as conn: + cursor = conn.cursor() + cursor.execute(""" + SELECT site_id, status, target_minute, start_date, last_run + FROM site_rules + ORDER BY site_id + """) + rows = cursor.fetchall() + + return [ + { + "site_id": row[0], + "status": row[1], + "target_minute": row[2], + "start_date": row[3], + "last_run": row[4] + } + for row in rows + ] + + def reset_to_learning(self, site_id: str) -> bool: + """ + 사이트를 다시 LEARNING 상태로 리셋 + + Args: + site_id: 사이트 식별자 + + Returns: + bool: 리셋 성공 여부 + """ + with self._get_connection() as conn: + cursor = conn.cursor() + cursor.execute(""" + UPDATE site_rules + SET status = 'LEARNING', target_minute = -1 + WHERE site_id = ? + """, (site_id,)) + conn.commit() + + return cursor.rowcount > 0 + + +# ========================================== +# Example Usage (main.py에서의 활용 예시) +# ========================================== +# +# from crawler_manager import CrawlerManager +# from crawlers import get_crawler +# from config import get_all_plants +# +# def main(): +# # 매니저 초기화 +# manager = CrawlerManager() +# +# # 모든 발전소 순회 +# for plant in get_all_plants(): +# site_id = plant.get('id', '') +# +# if not site_id: +# continue +# +# # 1. 사이트 등록 (최초 1회) +# manager.register_site(site_id) +# +# # 2. 실행 여부 확인 +# if not manager.should_run(site_id): +# print(f" ⏭️ {site_id} 스킵 (최적화 윈도우 외)") +# continue +# +# # 3. 크롤링 실행 +# try: +# crawler_func = get_crawler(plant['type']) +# data = crawler_func(plant) +# +# if data: +# # 4. 실행 기록 +# manager.record_run(site_id) +# +# # 5. (옵션) 패턴 분석 후 최적화 +# # 예: 데이터가 항상 매시 10분에 갱신된다면 +# # manager.update_optimization(site_id, 10) +# +# except Exception as e: +# print(f" ❌ {site_id} 오류: {e}") +# +# if __name__ == "__main__": +# main() +# +# ========================================== +# Cron 예시 (5분마다 실행) +# ========================================== +# */5 * * * * cd /volume1/dev/SolorPower/crawler && \ +# /volume1/dev/SolorPower/crawler/venv/bin/python main.py >> cron.log 2>&1 +# +# - LEARNING 사이트는 5분마다 크롤링 (패턴 학습) +# - OPTIMIZED 사이트는 학습된 시점 직후 10분 윈도우에서만 크롤링 +# - 야간(21시~05시)에는 모든 크롤링 중지 +# ========================================== + + +if __name__ == "__main__": + # 테스트 코드 + manager = CrawlerManager() + + print("=== CrawlerManager 테스트 ===\n") + + # 사이트 등록 + test_sites = ["nrems-01", "nrems-02", "kremc-05"] + for site_id in test_sites: + manager.register_site(site_id) + + # 현재 상태 출력 + print("\n[등록된 사이트]") + for site in manager.get_all_sites(): + print(f" {site['site_id']}: {site['status']} (target: {site['target_minute']}분)") + + # should_run 테스트 + print("\n[should_run 테스트]") + for site_id in test_sites: + result = manager.should_run(site_id) + print(f" {site_id}: {'✅ 실행' if result else '⏭️ 스킵'}") + + # 최적화 적용 + print("\n[최적화 적용]") + manager.update_optimization("nrems-01", 15) # 매시 15분에 업데이트 + manager.update_optimization("kremc-05", 30) # 매시 30분에 업데이트 + + # 최적화 후 상태 + print("\n[최적화 후 상태]") + for site in manager.get_all_sites(): + print(f" {site['site_id']}: {site['status']} (target: {site['target_minute']}분)") + + # 실행 기록 + manager.record_run("nrems-01") + + print("\n=== 테스트 완료 ===") diff --git a/crawler_structure.md b/crawler_structure.md new file mode 100644 index 0000000..4fcbf0b --- /dev/null +++ b/crawler_structure.md @@ -0,0 +1,110 @@ +# Crawler 시스템 파일 구조 및 역할 정의 + +이 문서는 `crawler` 폴더 내의 각 파일과 모듈의 역할, 기능, 그리고 상호 작용 방식에 대해 자세히 설명합니다. + +## 📁 디렉토리 구조 및 핵심 파일 요약 + +| 파일명 | 분류 | 핵심 역할 | +|---|---|---| +| **main.py** | Core | 크롤러 시스템의 메인 진입점. 전체 수집 프로세스 조율 | +| **config.py** | Config | 발전소 정보, 비밀번호, 시스템 상수 등 설정 관리 | +| **database.py** | Data | Supabase 데이터베이스 연결 및 CRUD 처리 | +| **crawler_manager.py** | Logic | 지능형 스케줄링 관리 (업데이트 패턴 학습 및 최적화) | +| **crawler_gui.py** | UI | 관리자용 대시보드 (윈도우 GUI), 모니터링 및 수동 제어 | +| **daily_summary.py** | Batch | 일일 발전 통계 집계 및 요약 테이블 저장 | +| **fetch_history.py** | Tool | 과거 데이터(Hourly, Daily) 수집 도구 | +| **sync_plants.py** | Tool | 발전소 메타 정보를 DB와 동기화 | +| **verify_data.py** | Test | 수집된 데이터의 무결성 검증 및 테스트 스크립트 | + +--- + +## 📄 파일별 상세 역할 분석 + +### 1. 핵심 시스템 (Core System) + +#### `main.py` +* **역할**: 전체 크롤링 시스템의 오케스트레이터(Orchestrator). +* **주요 기능**: + * `integrated_monitoring()` 함수를 통해 정의된 모든 발전소를 순회합니다. + * `CrawlerManager`를 통해 현재 시점에 실행해야 할 크롤러를 선별합니다. + * 각 발전소 타입에 맞는 크롤러 함수(`crawlers` 패키지)를 동적으로 호출합니다. + * 수집된 실시간 데이터를 콘솔에 출력하고, `database.py`를 통해 DB에 저장합니다. + * 발전량이 0인 경우 등 간단한 이상 감지 로직을 수행합니다. +* **실행 방식**: 스케줄러(Cron 등)에 의해 주기적으로 실행되거나, GUI에서 호출됩니다. `--force` 옵션으로 강제 실행 가능합니다. + +#### `config.py` +* **역할**: 시스템 설정 및 발전소 정보의 단일 진실 공급원(Single Source of Truth). +* **주요 기능**: + * `SYSTEM_CONSTANTS`: 각 크롤러 시스템(NREMS, KREMC 등)의 URL 및 API 엔드포인트 정의. + * `COMPANIES`: 업체 및 산하 발전소들의 계층 구조, 인증 정보(ID/PW), 용량(Customer ID) 등을 JSON 구조로 관리. + * `get_all_plants()`: 계층화된 데이터를 크롤러가 사용하기 쉬운 평탄화(Flat)된 리스트로 변환하여 제공. + * **특이 사항**: 보안이 필요한 인증 정보가 포함되어 있어 관리에 주의가 필요합니다. 1, 2호기와 같이 하나의 계정으로 분리되는 발전소(`is_split`) 설정도 이곳에서 관리됩니다. + +#### `crawler_manager.py` (Smart Scheduler) +* **역할**: 비효율적인 반복 호출을 줄이고 NAS 리소스를 절약하기 위한 미들웨어. +* **주요 기능**: + * **SQLite 기반 상태 관리**: `crawler_manager.db` 로컬 파일에 각 발전소의 상태 저장. + * **학습 모드(LEARNING)**: 초기에는 자주 실행하며 발전소 서버의 데이터 업데이트 주기 패턴을 학습. + * **최적화 모드(OPTIMIZED)**: 학습된 업데이트 시점(예: 매시 15분) 전후의 윈도우(Window)에만 크롤링을 허용. + * 야간(21시~05시) 크롤링 자동 차단 로직 포함. + +### 2. 데이터 관리 (Data Management) + +#### `database.py` +* **역할**: Supabase 클라우드 데이터베이스와의 인터페이스. +* **주요 기능**: + * Supabase 클라이언트 싱글턴 연결 관리. + * `save_to_supabase()`: 실시간 발전 데이터(`solar_logs`) 저장. 일일 통계(`daily_stats`) 단순 Upsert 처리. + * `save_history()`: 과거 내역 저장 시 사용되며, `solar_logs`(Hourly), `daily_stats`(Daily), `monthly_stats`(Monthly) 등 데이터 타입에 따라 적절한 테이블에 저장하고, 월별 통계 자동 갱신 트리거 로직을 포함합니다. + +#### `daily_summary.py` +* **역할**: 수집된 로그 데이터를 기반으로 일일 최종 통계를 확정 짓는 배치 스크립트. +* **주요 기능**: + * 특정 날짜의 `solar_logs`를 모두 조회하여 발전소별 총 발전량, 피크 출력, 발전 시간(이용률)을 계산. + * 계산된 확정 데이터를 `daily_stats` 테이블에 저장. + * 주로 하루가 끝나는 시점이나 다음 날 새벽에 실행하여 데이터 정확도를 보정합니다. + +### 3. 사용자 인터페이스 (User Interface) + +#### `crawler_gui.py` +* **역할**: 윈도우 환경에서 크롤러 상태를 시각적으로 모니터링하고 제어하는 관리자 도구. +* **주요 기능**: + * `tkinter` 기반의 GUI 제공. + * 발전소별 현재 상태(대기, 실행중, 최적화 여부), 마지막 실행 시간 등을 트리 뷰(Tree View)로 표시. + * 개별/전체 크롤링 강제 실행, 히스토리 수집 명령, 학습 모드 리셋 등의 제어 기능 제공. + * 실시간 로그 창을 통해 백그라운드 프로세스(`subprocess`)의 실행 결과를 출력. + +### 4. 도구 및 유틸리티 (Tools & Utilities) + +#### `fetch_history.py` +* **역할**: 누락된 데이터나 초기 구축 시 과거 데이터를 수집하기 위한 스크립트. +* **주요 기능**: + * 특정 발전소 ID를 인자로 받아 과거 데이터를 조회. + * 각 크롤러 모듈(`crawlers/`)에 구현된 `fetch_history_hourly`, `fetch_history_daily` 등을 호출. + * 시간별(Hourly), 일별(Daily) 데이터를 수집하여 DB에 적재. + +#### `sync_plants.py` +* **역할**: 로컬 코드(`config.py`)와 원격 DB(`plants` 테이블) 간의 메타 데이터 동기화. +* **주요 기능**: + * 새로운 발전소가 추가되거나 이름/용량이 변경되었을 때, `config.py`의 내용을 DB의 마스터 테이블에 반영(Upsert). + * NREMS 1, 2호기와 같이 논리적으로 분리해야 하는 발전소를 별도 레코드로 DB에 생성. + +#### `verify_data.py` +* **역할**: 크롤링 로직 검증 및 데이터 무결성 테스트. +* **주요 기능**: + * 각 발전소별로 샘플 날짜(과거/현재)를 지정하여 실제 데이터를 가져와 봅니다. + * 시간별, 일별, 월별 합계가 논리적으로 맞는지 검증 포맷을 출력하여 개발자가 확인하기 쉽게 돕습니다. + +### 5. 하위 폴더 + +#### `crawlers/` (폴더) +* **역할**: 실제 사이트별 크롤링 로직이 구현된 모듈들의 집합. +* **구성**: + * `nrems.py`, `kremc.py`, `hyundai.py`, `sun_wms.py`, `cmsolar.py` 등 사이트 타입별로 파일이 존재. + * 각 모듈은 공통적으로 `get_current_status()` (실시간), `fetch_history_*` (과거 내역) 등의 인터페이스를 구현해야 함. + +#### `venv/`, `temp_env/` (폴더) +* **역할**: Python 가상 환경 폴더. 프로젝트 실행에 필요한 라이브러리(`requests`, `pandas`, `supabase` 등)가 설치됨. + +--- +*작성일: 2026-01-28* diff --git a/crawlers/__init__.py b/crawlers/__init__.py new file mode 100644 index 0000000..73053af --- /dev/null +++ b/crawlers/__init__.py @@ -0,0 +1,20 @@ +# crawlers 패키지 초기화 + +from .nrems import fetch_data as fetch_nrems +from .kremc import fetch_data as fetch_kremc +from .sun_wms import fetch_data as fetch_sunwms +from .hyundai import fetch_data as fetch_hyundai +from .cmsolar import fetch_data as fetch_cmsolar + +# 크롤러 타입별 매핑 +CRAWLER_MAP = { + 'nrems': fetch_nrems, + 'kremc': fetch_kremc, + 'sun_wms': fetch_sunwms, + 'hyundai': fetch_hyundai, + 'cmsolar': fetch_cmsolar +} + +def get_crawler(crawler_type): + """크롤러 타입에 해당하는 fetch 함수 반환""" + return CRAWLER_MAP.get(crawler_type) diff --git a/crawlers/base.py b/crawlers/base.py new file mode 100644 index 0000000..bfba9bc --- /dev/null +++ b/crawlers/base.py @@ -0,0 +1,100 @@ +# ========================================== +# crawlers/base.py - 크롤러 공통 유틸리티 +# ========================================== + +import requests + +def safe_float(value): + """ + 안전한 float 변환 + None, 빈 문자열, 콤마 포함 숫자 등을 처리 + """ + if value is None: + return 0.0 + try: + return float(str(value).replace(',', '')) + except (ValueError, TypeError): + return 0.0 + +def create_session(): + """기본 설정된 requests 세션 생성""" + session = requests.Session() + return session + +def get_default_headers(): + """기본 HTTP 헤더 반환""" + return { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Accept': 'application/json, text/plain, */*' + } + +def determine_status(current_kw): + """발전량 기반 상태 결정""" + if current_kw > 0: + return "🟢 정상" + else: + return "💤 대기" + +def format_result(name, kw, today, plant_id, status=None): + """결과 딕셔너리 포맷 통일""" + if status is None: + status = determine_status(kw) + + return { + 'name': name, + 'kw': kw, + 'today': today, + 'id': plant_id, + 'status': status + } + +def validate_data_quality(data_list, value_key='generation_kwh'): + """ + 데이터 품질 검증 + + Returns: + dict: { + 'is_valid': bool, + 'warnings': list, + 'all_zero': bool, + 'duplicate_ratio': float + } + """ + if not data_list or len(data_list) == 0: + return { + 'is_valid': False, + 'warnings': ['데이터 없음'], + 'all_zero': True, + 'duplicate_ratio': 0.0 + } + + warnings = [] + values = [safe_float(item.get(value_key, 0)) for item in data_list] + + # 모두 0인 경우 체크 + all_zero = all(v == 0 for v in values) + if all_zero: + warnings.append('모든 값이 0 - 실제 데이터가 아닐 가능성') + + # 연속 중복 체크 + if len(values) > 1: + duplicates = 0 + for i in range(len(values) - 1): + if values[i] == values[i+1]: + duplicates += 1 + + duplicate_ratio = duplicates / (len(values) - 1) + + if duplicate_ratio > 0.8: + warnings.append(f'연속 중복 비율 {duplicate_ratio*100:.1f}% - 실제 데이터가 아닐 가능성') + else: + duplicate_ratio = 0.0 + + is_valid = not all_zero and duplicate_ratio < 0.8 + + return { + 'is_valid': is_valid, + 'warnings': warnings, + 'all_zero': all_zero, + 'duplicate_ratio': duplicate_ratio + } \ No newline at end of file diff --git a/crawlers/cmsolar.py b/crawlers/cmsolar.py new file mode 100644 index 0000000..a53e19e --- /dev/null +++ b/crawlers/cmsolar.py @@ -0,0 +1,512 @@ +# ========================================== +# crawlers/cmsolar.py - CMSolar 크롤러 (10호기) +# HTML 테이블 파싱 방식 +# ========================================== + +import requests +import re +from .base import create_session, safe_float + +def fetch_data(plant_info): + """ + CMSolar 발전소 데이터 수집 + """ + plant_id = plant_info.get('id', 'cmsolar-10') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + company_name = plant_info.get('company_name', '함안햇빛발전소') + plant_name = plant_info.get('name', '10호기') + + login_id = auth.get('login_id', '') + login_pw = auth.get('login_pw', '') + site_no = auth.get('site_no', '') + login_url = system.get('login_url', '') + data_url = system.get('data_url', '') + + session = create_session() + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded' + } + + # 로그인 + login_data = { + 'login_id': login_id, + 'login_pw': login_pw, + 'site_no': site_no + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code != 200: + return [] + + # Site selection (Required for idx_ok.php) + base_url = system.get('base_url', 'http://www.cmsolar2.kr') + change_url = f"{base_url}/change.php?site={site_no}" + session.get(change_url, headers=headers) + + except Exception as e: + print(f"❌ {plant_name} 접속 에러: {e}") + return [] + + # 데이터 요청 (JSON Endpoint) + target_url = f"{base_url}/plant/sub/idx_ok.php?mode=getPlant" + + try: + res = session.get(target_url, headers=headers) + + if res.status_code == 200: + # Handle potential encoding issues if needed, though requests usually guesses well + if res.encoding is None: + res.encoding = 'utf-8' + + data = res.json() + + # Parsing logic for [{"plant": {...}}] structure + if isinstance(data, list) and len(data) > 0: + plant_data = data[0].get('plant', {}) + + # Unit Conversion: W -> kW + curr_kw = safe_float(plant_data.get('now', 0)) / 1000.0 + today_kwh = safe_float(plant_data.get('today', 0)) / 1000.0 + + # Status check + is_error = int(plant_data.get('inv_error', 0)) + status = "🟢 정상" if is_error == 0 else "🔴 점검/고장" + + # 0kW during day is suspicious but night is normal. + # If needed, override status based on time, but sticking to error flag is safer. + if curr_kw == 0 and status == "🟢 정상": + # Optional: Check if night time? + pass + + return [{ + 'id': plant_id, + 'name': f'{company_name} {plant_name}', + 'kw': curr_kw, + 'today': today_kwh, + 'status': status + }] + else: + print(f"❌ {plant_name} 데이터 형식 오류: {data}") + return [] + else: + return [] + + except Exception as e: + print(f"❌ {plant_name} 에러: {e}") + return [] + + +def fetch_history_hourly(plant_info, start_date, end_date): + """ + CMSolar 발전소의 시간대별 과거 데이터 수집 + + 실제 엔드포인트: /plant/sub/report_ok.php (HTML 테이블 응답) + 파라미터: mode=getPowers&type=daily&device=total&start=YYYY-MM-DD&money= + """ + from datetime import datetime, timedelta + + results = [] + plant_id = plant_info.get('id', 'cmsolar-10') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '10호기') + + login_id = auth.get('login_id', '') + login_pw = auth.get('login_pw', '') + site_no = auth.get('site_no', '') + login_url = system.get('login_url', '') + + # 실제 데이터 엔드포인트 + base_url = system.get('api_url', 'http://www.cmsolar2.kr') + data_url = f"{base_url}/plant/sub/report_ok.php" + + session = create_session() + + print(f"\n{'='*60}") + print(f"[CMSolar History] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = { + 'login_id': login_id, + 'login_pw': login_pw, + 'site_no': site_no + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 사이트 선택 (필수!) + try: + change_url = f"{base_url}/change.php?site={site_no}" + session.get(change_url, headers=headers) + print(" ✓ Site selected") + except Exception as e: + print(f" ✗ Site selection error: {e}") + return results + + # 날짜 반복 + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + while current_date <= end_dt: + date_str = current_date.strftime('%Y-%m-%d') + + # 실제 확인된 시간별 엔드포인트 (type=daily는 하루 치 시간별 데이터 반환) + params = { + 'mode': 'getPowers', + 'type': 'daily', + 'device': 'total', + 'start': date_str, + 'money': '' + } + + try: + res = session.get(data_url, params=params, headers=headers, timeout=10) + res.encoding = 'utf-8' + + if res.status_code == 200: + # HTML 테이블 파싱 + html = res.text + + # 안의 태그 찾기 + tbody_match = re.search(r'(.*?)', html, re.DOTALL) + if tbody_match: + tbody_content = tbody_match.group(1) + + # 각 파싱 (시간과 발전량) + # 93.0... + tr_pattern = r']*>\s*(\d+)\s*([\d.]+)' + matches = re.findall(tr_pattern, tbody_content) + + if matches: + print(f" ✓ Found {len(matches)} hourly records for {date_str}") + + for hour, kwh in matches: + generation_kwh = safe_float(kwh) + timestamp = f"{date_str} {hour.zfill(2)}:00:00" + + results.append({ + 'plant_id': plant_id, + 'timestamp': timestamp, + 'generation_kwh': generation_kwh, + 'current_kw': 0 + }) + else: + print(f" ⚠ No data for {date_str}") + else: + print(f" ⚠ No tbody found for {date_str}") + else: + print(f" ✗ HTTP {res.status_code}") + + except Exception as e: + print(f" ✗ Error for {date_str}: {e}") + + current_date += timedelta(days=1) + + print(f"\n{'='*60}") + print(f"[Total] Collected {len(results)} hourly records") + print(f"{'='*60}\n") + + return results + + +def fetch_history_daily(plant_info, start_date, end_date): + """ + CMSolar 발전소의 일별 과거 데이터 수집 + + 실제 엔드포인트: /plant/sub/report_ok.php (HTML 테이블 응답) + 파라미터: mode=getPowers&type=month&device=total&start=YYYY-MM-01&money= + """ + from datetime import datetime + from dateutil.relativedelta import relativedelta + + results = [] + plant_id = plant_info.get('id', 'cmsolar-10') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '10호기') + + login_id = auth.get('login_id', '') + login_pw = auth.get('login_pw', '') + site_no = auth.get('site_no', '') + login_url = system.get('login_url', '') + + # 실제 데이터 엔드포인트 + base_url = system.get('api_url', 'http://www.cmsolar2.kr') + data_url = f"{base_url}/plant/sub/report_ok.php" + + session = create_session() + + print(f"\n{'='*60}") + print(f"[CMSolar Daily] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = { + 'login_id': login_id, + 'login_pw': login_pw, + 'site_no': site_no + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 사이트 선택 (필수!) + try: + change_url = f"{base_url}/change.php?site={site_no}" + session.get(change_url, headers=headers) + print(" ✓ Site selected") + except Exception as e: + print(f" ✗ Site selection error: {e}") + return results + + # 월 단위로 반복 (type=month는 한 달 치 일별 데이터 반환) + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + while current_date <= end_dt: + month_start = current_date.strftime('%Y-%m-01') + year = current_date.year + month = current_date.month + + # 실제 확인된 일별 엔드포인트 (type=month) + params = { + 'mode': 'getPowers', + 'type': 'month', + 'device': 'total', + 'start': month_start, + 'money': '' + } + + try: + res = session.get(data_url, params=params, headers=headers, timeout=10) + res.encoding = 'utf-8' + + if res.status_code == 200: + # HTML 테이블 파싱 + html = res.text + + # 안의 태그 찾기 + tbody_match = re.search(r'(.*?)', html, re.DOTALL) + if tbody_match: + tbody_content = tbody_match.group(1) + + # 각 파싱 (날짜와 발전량) + # 1136.00... + tr_pattern = r']*>\s*(\d+)\s*([\d.,]+)' + matches = re.findall(tr_pattern, tbody_content) + + if matches: + print(f" ✓ Found {len(matches)} daily records for {month_start[:7]}") + + for day, kwh in matches: + # 쉼표 제거 + kwh_clean = kwh.replace(',', '') + generation_kwh = safe_float(kwh_clean) + + date_str = f"{year:04d}-{month:02d}-{int(day):02d}" + + # 날짜 범위 필터링 + if date_str >= start_date and date_str <= end_date: + results.append({ + 'plant_id': plant_id, + 'date': date_str, + 'generation_kwh': generation_kwh, + 'current_kw': 0 + }) + print(f" ✓ {date_str}: {generation_kwh:.2f}kWh") + else: + print(f" ⚠ No tbody found for {month_start[:7]}") + else: + print(f" ✗ HTTP {res.status_code} for {month_start[:7]}") + + except Exception as e: + print(f" ✗ Error for {month_start[:7]}: {e}") + + # 다음 달로 이동 + current_date = (current_date.replace(day=1) + relativedelta(months=1)) + + print(f"[Total] Collected {len(results)} daily records\n") + return results + + +def fetch_history_monthly(plant_info, start_month, end_month): + """ + CMSolar 발전소의 월별 과거 데이터 수집 + + 실제 엔드포인트: /plant/sub/report_ok.php (HTML 테이블 응답) + 파라미터: mode=getPowers&type=year&device=total&start=YYYY-01-01&money= + """ + from datetime import datetime + from dateutil.relativedelta import relativedelta + + results = [] + plant_id = plant_info.get('id', 'cmsolar-10') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '10호기') + + # 시작일자 체크 + plant_start_date = plant_info.get('start_date', '2020-08-31') + plant_start_month = plant_start_date[:7] # YYYY-MM + + # 실제 시작 월은 발전소 가동일 이후로 제한 + if start_month < plant_start_month: + actual_start = plant_start_month + print(f" ℹ 발전소 가동일({plant_start_date}) 이후부터 수집: {actual_start}") + else: + actual_start = start_month + + login_id = auth.get('login_id', '') + login_pw = auth.get('login_pw', '') + site_no = auth.get('site_no', '') + login_url = system.get('login_url', '') + + # 실제 데이터 엔드포인트 + base_url = system.get('api_url', 'http://www.cmsolar2.kr') + data_url = f"{base_url}/plant/sub/report_ok.php" + + session = create_session() + + print(f"\n{'='*60}") + print(f"[CMSolar Monthly] {plant_name} ({actual_start} ~ {end_month})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = { + 'login_id': login_id, + 'login_pw': login_pw, + 'site_no': site_no + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 사이트 선택 (필수!) + try: + change_url = f"{base_url}/change.php?site={site_no}" + session.get(change_url, headers=headers) + print(" ✓ Site selected") + except Exception as e: + print(f" ✗ Site selection error: {e}") + return results + + # 연도별로 반복 (type=year는 한 해 치 월별 데이터 반환) + current_month = datetime.strptime(actual_start, '%Y-%m') + end_month_dt = datetime.strptime(end_month, '%Y-%m') + + processed_years = set() + + while current_month <= end_month_dt: + year = current_month.year + + # 이미 처리한 연도는 스킵 + if year in processed_years: + current_month += relativedelta(months=1) + continue + + processed_years.add(year) + year_start = f"{year}-01-01" + + # 실제 확인된 월별 엔드포인트 (type=year) + params = { + 'mode': 'getPowers', + 'type': 'year', + 'device': 'total', + 'start': year_start, + 'money': '' + } + + try: + res = session.get(data_url, params=params, headers=headers, timeout=10) + res.encoding = 'utf-8' + + if res.status_code == 200: + # HTML 테이블 파싱 + html = res.text + + # 안의 태그 찾기 + tbody_match = re.search(r'(.*?)', html, re.DOTALL) + if tbody_match: + tbody_content = tbody_match.group(1) + + # 각 파싱 (월과 발전량) + # 12,836.00... + tr_pattern = r']*>\s*(\d+)\s*([\d.,]+)' + matches = re.findall(tr_pattern, tbody_content) + + if matches: + year_count = 0 + for month, kwh in matches: + # 쉼표 제거 + kwh_clean = kwh.replace(',', '') + generation_kwh = safe_float(kwh_clean) + + month_str = f"{year:04d}-{int(month):02d}" + + # 월 범위 필터링 + if month_str >= actual_start and month_str <= end_month: + results.append({ + 'plant_id': plant_id, + 'month': month_str, + 'generation_kwh': generation_kwh + }) + print(f" ✓ {month_str}: {generation_kwh:.1f}kWh") + year_count += 1 + + if year_count > 0: + print(f" → Collected {year_count} months from {year}") + else: + print(f" ⚠ No tbody found for year {year}") + else: + print(f" ✗ HTTP {res.status_code} for year {year}") + + except Exception as e: + print(f" ✗ Error for year {year}: {e}") + + # 다음 연도로 이동 + current_month = current_month.replace(year=year+1, month=1) + + print(f"[Total] Collected {len(results)} monthly records\n") + return results diff --git a/crawlers/cmsolar_old.py b/crawlers/cmsolar_old.py new file mode 100644 index 0000000..d10d002 --- /dev/null +++ b/crawlers/cmsolar_old.py @@ -0,0 +1,319 @@ +# ========================================== +# crawlers/cmsolar.py - CMSolar 크롤러 (10호기) +# ========================================== + +import requests +from .base import create_session + +def fetch_data(plant_info): + """ + CMSolar 발전소 데이터 수집 + """ + plant_id = plant_info.get('id', 'cmsolar-10') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + company_name = plant_info.get('company_name', '태양과바람') + plant_name = plant_info.get('name', '10호기') + + login_id = auth.get('login_id', '') + login_pw = auth.get('login_pw', '') + site_no = auth.get('site_no', '') + base_url = system.get('base_url', '') + + session = create_session() + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/143.0.0.0 Safari/537.36', + 'Referer': f'{base_url}/plant/index.php' + } + + # 1. 로그인 + try: + login_data = {'id': login_id, 'pw': login_pw, 'commit': 'Login'} + session.post(f"{base_url}/login_ok.php", data=login_data, headers=headers) + except: + return [] + + # 2. 사이트 선택 + try: + session.get(f"{base_url}/change.php?site={site_no}", headers=headers) + except: + return [] + + # 3. 데이터 요청 + target_url = f"{base_url}/plant/sub/idx_ok.php?mode=getPlant" + + try: + res = session.get(target_url, headers=headers) + res.encoding = 'utf-8' + + data = res.json() + plant_data = data[0]['plant'] + + # 단위 변환 (W -> kW, Wh -> kWh) + curr_kw = float(plant_data.get('now', 0)) / 1000 + today_kwh = float(plant_data.get('today', 0)) / 1000 + + is_error = int(plant_data.get('inv_error', 0)) + status = "🟢 정상" if is_error == 0 else "🔴 점검/고장" + + print(f" [CMSolar] {plant_name} 수집 완료: {round(curr_kw, 2)} kW") + return [{ + 'id': plant_id, + 'name': f'{company_name} {plant_name}', + 'kw': round(curr_kw, 2), + 'today': round(today_kwh, 2), + 'status': status + }] + + except Exception as e: + print(f"❌ {plant_name} 에러: {e}") + return [] + + +def fetch_history_daily(plant_info, start_date, end_date): + """ + CMSolar 발전소의 일별 과거 데이터 수집 + """ + from datetime import datetime, timedelta + from .base import safe_float + + results = [] + plant_id = plant_info.get('id', 'cmsolar-10') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '10호기') + + login_id = auth.get('login_id', '') + login_pw = auth.get('login_pw', '') + site_no = auth.get('site_no', '') + base_url = system.get('base_url', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[CMSolar Daily] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Referer': f'{base_url}/plant/index.php' + } + + try: + login_data = {'id': login_id, 'pw': login_pw, 'commit': 'Login'} + session.post(f"{base_url}/login_ok.php", data=login_data, headers=headers) + session.get(f"{base_url}/change.php?site={site_no}", headers=headers) + print(" ✓ Login successful") + except Exception as e: + print(f" ✗ Login failed: {e}") + return results + + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + while current_date <= end_dt: + date_str = current_date.strftime('%Y-%m-%d') + + # 일별 데이터 엔드포인트 (추정) + daily_url = f"{base_url}/plant/sub/daily_data.php?date={date_str}" + + try: + res = session.get(daily_url, headers=headers, timeout=10) + res.encoding = 'utf-8' + + if res.status_code == 200: + data = res.json() + daily_kwh = safe_float(data.get('today', data.get('daily', 0))) / 1000.0 + + results.append({ + 'plant_id': plant_id, + 'date': date_str, + 'generation_kwh': daily_kwh + }) + print(f" ✓ {date_str}: {daily_kwh}kWh") + + except Exception as e: + print(f" ✗ {date_str}: {e}") + + current_date += timedelta(days=1) + + print(f"[Total] Collected {len(results)} daily records\n") + return results + + +def fetch_history_monthly(plant_info, start_month, end_month): + """ + CMSolar 발전소의 월별 과거 데이터 수집 + """ + from datetime import datetime + from dateutil.relativedelta import relativedelta + from .base import safe_float + + results = [] + plant_id = plant_info.get('id', 'cmsolar-10') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '10호기') + + login_id = auth.get('login_id', '') + login_pw = auth.get('login_pw', '') + site_no = auth.get('site_no', '') + base_url = system.get('base_url', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[CMSolar Monthly] {plant_name} ({start_month} ~ {end_month})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Referer': f'{base_url}/plant/index.php' + } + + try: + login_data = {'id': login_id, 'pw': login_pw, 'commit': 'Login'} + session.post(f"{base_url}/login_ok.php", data=login_data, headers=headers) + session.get(f"{base_url}/change.php?site={site_no}", headers=headers) + print(" ✓ Login successful") + except Exception as e: + print(f" ✗ Login failed: {e}") + return results + + current_month = datetime.strptime(start_month, '%Y-%m') + end_month_dt = datetime.strptime(end_month, '%Y-%m') + + while current_month <= end_month_dt: + month_str = current_month.strftime('%Y-%m') + + # 월별 데이터 엔드포인트 (추정) + monthly_url = f"{base_url}/plant/sub/monthly_data.php?month={month_str}" + + try: + res = session.get(monthly_url, headers=headers, timeout=10) + res.encoding = 'utf-8' + + if res.status_code == 200: + data = res.json() + monthly_kwh = safe_float(data.get('month', data.get('monthly', 0))) / 1000.0 + + results.append({ + 'plant_id': plant_id, + 'month': month_str, + 'generation_kwh': monthly_kwh + }) + print(f" ✓ {month_str}: {monthly_kwh}kWh") + + except Exception as e: + print(f" ✗ {month_str}: {e}") + + current_month += relativedelta(months=1) + + print(f"[Total] Collected {len(results)} monthly records\n") + return results + + +def fetch_history_hourly(plant_info, start_date, end_date): + """ + CMSolar 발전소의 시간대별 과거 데이터 수집 + + Args: + plant_info: dict, 발전소 정보 + start_date: str, 시작일 (YYYY-MM-DD) + end_date: str, 종료일 (YYYY-MM-DD) + + Returns: + list: 시간대별 데이터 레코드 + """ + from datetime import datetime, timedelta + from .base import safe_float + + results = [] + + # 설정 추출 + plant_id = plant_info.get('id', 'cmsolar-10') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '10호기') + + login_id = auth.get('login_id', '') + login_pw = auth.get('login_pw', '') + site_no = auth.get('site_no', '') + base_url = system.get('base_url', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[CMSolar History] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + # 로그인 + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/143.0.0.0 Safari/537.36', + 'Referer': f'{base_url}/plant/index.php' + } + + try: + login_data = {'id': login_id, 'pw': login_pw, 'commit': 'Login'} + session.post(f"{base_url}/login_ok.php", data=login_data, headers=headers) + + # 사이트 선택 + session.get(f"{base_url}/change.php?site={site_no}", headers=headers) + print(f" ✓ Login successful") + + except Exception as e: + print(f" ✗ Login failed: {e}") + return results + + # 날짜 범위 반복 + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + while current_date <= end_dt: + date_str = current_date.strftime('%Y-%m-%d') + print(f"\n[Processing Date] {date_str}") + + # 시간대별 데이터 엔드포인트 (추정) + hourly_url = f"{base_url}/plant/sub/hourly_data.php?site={site_no}&date={date_str}" + + try: + res = session.get(hourly_url, headers=headers, timeout=10) + res.encoding = 'utf-8' + + if res.status_code == 200: + data = res.json() + hourly_data = data if isinstance(data, list) else data.get('hourly', []) + + if hourly_data and len(hourly_data) > 0: + print(f" ✓ Found {len(hourly_data)} hourly records") + + for item in hourly_data: + hour = str(item.get('hour', item.get('time', '00'))).zfill(2) + generation_wh = safe_float(item.get('energy', item.get('now', 0))) + generation_kwh = generation_wh / 1000.0 if generation_wh > 1000 else generation_wh + current_kw = safe_float(item.get('power', 0)) / 1000.0 + + timestamp = f"{date_str} {hour}:00:00" + + results.append({ + 'plant_id': plant_id, + 'timestamp': timestamp, + 'generation_kwh': generation_kwh, + 'current_kw': current_kw + }) + else: + print(f" ⚠ No hourly data for {date_str}") + else: + print(f" ✗ HTTP {res.status_code}") + + except Exception as e: + print(f" ✗ Error: {e}") + + # 다음 날짜로 + current_date += timedelta(days=1) + + print(f"\n{'='*60}") + print(f"[Total] Collected {len(results)} hourly records") + print(f"{'='*60}\n") + + return results diff --git a/crawlers/cmsolar_old2.py b/crawlers/cmsolar_old2.py new file mode 100644 index 0000000..83dc02c --- /dev/null +++ b/crawlers/cmsolar_old2.py @@ -0,0 +1,427 @@ +# ========================================== +# crawlers/cmsolar.py - CMSolar 크롤러 (10호기) +# ========================================== + +import requests +from .base import create_session, safe_float + +def fetch_data(plant_info): + """ + CMSolar 발전소 데이터 수집 + """ + plant_id = plant_info.get('id', 'cmsolar-10') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + company_name = plant_info.get('company_name', '함안햇빛발전소') + plant_name = plant_info.get('name', '10호기') + + login_id = auth.get('payload_id', '') + login_pw = auth.get('payload_pw', '') + site_no = auth.get('site_no', '') + login_url = system.get('login_url', '') + data_url = system.get('data_url', '') + + session = create_session() + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded' + } + + # 로그인 + login_data = { + 'login_id': login_id, + 'login_pw': login_pw, + 'site_no': site_no + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code != 200: + return [] + except Exception as e: + print(f"❌ {plant_name} 접속 에러: {e}") + return [] + + # 데이터 요청 + try: + res = session.get(data_url, headers=headers) + + if res.status_code == 200: + data = res.json() + curr_kw = safe_float(data.get('current', data.get('power', 0))) + today_kwh = safe_float(data.get('today', data.get('generation', 0))) + status = "🟢 정상" if curr_kw > 0 else "💤 대기" + + return [{ + 'id': plant_id, + 'name': f'{company_name} {plant_name}', + 'kw': curr_kw, + 'today': today_kwh, + 'status': status + }] + else: + return [] + + except Exception as e: + print(f"❌ {plant_name} 에러: {e}") + return [] + + +def fetch_history_hourly(plant_info, start_date, end_date): + """ + CMSolar 발전소의 시간대별 과거 데이터 수집 + + 실제 엔드포인트: /plant/sub/report_ok.php + 파라미터: mode=getPowers&type=daily&device=total&start=YYYY-MM-DD&money= + """ + from datetime import datetime, timedelta + + results = [] + plant_id = plant_info.get('id', 'cmsolar-10') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '10호기') + + login_id = auth.get('payload_id', '') + login_pw = auth.get('payload_pw', '') + site_no = auth.get('site_no', '') + login_url = system.get('login_url', '') + + # 실제 데이터 엔드포인트 + base_url = system.get('api_url', 'http://www.cmsolar2.kr') + data_url = f"{base_url}/plant/sub/report_ok.php" + + session = create_session() + + print(f"\n{'='*60}") + print(f"[CMSolar History] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = { + 'login_id': login_id, + 'login_pw': login_pw, + 'site_no': site_no + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 사이트 선택 (필수!) + try: + change_url = f"{base_url}/change.php?site={site_no}" + session.get(change_url, headers=headers) + print(" ✓ Site selected") + except Exception as e: + print(f" ✗ Site selection error: {e}") + return results + + # 날짜 반복 + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + while current_date <= end_dt: + date_str = current_date.strftime('%Y-%m-%d') + + # 실제 확인된 시간별 엔드포인트 (type=daily는 하루 치 시간별 데이터 반환) + params = { + 'mode': 'getPowers', + 'type': 'daily', + 'device': 'total', + 'start': date_str, + 'money': '' + } + + try: + res = session.get(data_url, params=params, headers=headers, timeout=10) + + if res.status_code == 200: + data = res.json() + # 시간별 데이터 파싱 + hourly_data = data.get('data', []) or data.get('list', []) or data.get('powers', []) + + if isinstance(hourly_data, list) and len(hourly_data) > 0: + print(f" ✓ Found {len(hourly_data)} hourly records for {date_str}") + + for item in hourly_data: + hour = str(item.get('hour', item.get('time', '00'))).zfill(2) + generation_kwh = safe_float(item.get('power', item.get('generation', item.get('kwh', 0)))) + current_kw = safe_float(item.get('kw', 0)) + + timestamp = f"{date_str} {hour}:00:00" + + results.append({ + 'plant_id': plant_id, + 'timestamp': timestamp, + 'generation_kwh': generation_kwh, + 'current_kw': current_kw + }) + else: + print(f" ⚠ No data for {date_str}") + else: + print(f" ✗ HTTP {res.status_code} for {date_str}") + + except Exception as e: + print(f" ✗ Error: {e}") + + current_date += timedelta(days=1) + + print(f"\n{'='*60}") + print(f"[Total] Collected {len(results)} hourly records") + print(f"{'='*60}\n") + + return results + + +def fetch_history_daily(plant_info, start_date, end_date): + """ + CMSolar 발전소의 일별 과거 데이터 수집 + + 실제 엔드포인트: /plant/sub/report_ok.php + 파라미터: mode=getPowers&type=month&device=total&start=YYYY-MM-DD&money= + """ + from datetime import datetime + from dateutil.relativedelta import relativedelta + + results = [] + plant_id = plant_info.get('id', 'cmsolar-10') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '10호기') + + login_id = auth.get('payload_id', '') + login_pw = auth.get('payload_pw', '') + site_no = auth.get('site_no', '') + login_url = system.get('login_url', '') + + # 실제 데이터 엔드포인트 + base_url = system.get('api_url', 'http://www.cmsolar2.kr') + data_url = f"{base_url}/plant/sub/report_ok.php" + + session = create_session() + + print(f"\n{'='*60}") + print(f"[CMSolar Daily] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = { + 'login_id': login_id, + 'login_pw': login_pw, + 'site_no': site_no + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 사이트 선택 (필수!) + try: + change_url = f"{base_url}/change.php?site={site_no}" + session.get(change_url, headers=headers) + print(" ✓ Site selected") + except Exception as e: + print(f" ✗ Site selection error: {e}") + return results + + # 월 단위로 반복 (type=month는 한 달 치 일별 데이터 반환) + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + while current_date <= end_dt: + month_start = current_date.strftime('%Y-%m-01') + + # 실제 확인된 일별 엔드포인트 (type=month) + params = { + 'mode': 'getPowers', + 'type': 'month', + 'device': 'total', + 'start': month_start, + 'money': '' + } + + try: + res = session.get(data_url, params=params, headers=headers, timeout=10) + + if res.status_code == 200: + data = res.json() + # 일별 데이터 파싱 + daily_data = data.get('data', []) or data.get('list', []) or data.get('powers', []) + + if isinstance(daily_data, list) and len(daily_data) > 0: + print(f" ✓ Found {len(daily_data)} daily records for {month_start[:7]}") + + for item in daily_data: + date_str = item.get('date', item.get('day', '')) + generation_kwh = safe_float(item.get('power', item.get('generation', item.get('kwh', 0)))) + current_kw = safe_float(item.get('kw', 0)) + + # 날짜 범위 필터링 + if date_str >= start_date and date_str <= end_date: + results.append({ + 'plant_id': plant_id, + 'date': date_str, + 'generation_kwh': generation_kwh, + 'current_kw': current_kw + }) + print(f" ✓ {date_str}: {generation_kwh:.2f}kWh") + else: + print(f" ✗ HTTP {res.status_code} for {month_start[:7]}") + + except Exception as e: + print(f" ✗ Error: {e}") + + # 다음 달로 이동 + current_date = (current_date.replace(day=1) + relativedelta(months=1)) + + print(f"[Total] Collected {len(results)} daily records\n") + return results + + +def fetch_history_monthly(plant_info, start_month, end_month): + """ + CMSolar 발전소의 월별 과거 데이터 수집 + + 실제 엔드포인트: /plant/sub/report_ok.php + 파라미터: mode=getPowers&type=year&device=total&start=YYYY-MM-DD&money= + """ + from datetime import datetime + from dateutil.relativedelta import relativedelta + + results = [] + plant_id = plant_info.get('id', 'cmsolar-10') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '10호기') + + login_id = auth.get('payload_id', '') + login_pw = auth.get('payload_pw', '') + site_no = auth.get('site_no', '') + login_url = system.get('login_url', '') + + # 실제 데이터 엔드포인트 + base_url = system.get('api_url', 'http://www.cmsolar2.kr') + data_url = f"{base_url}/plant/sub/report_ok.php" + + session = create_session() + + print(f"\n{'='*60}") + print(f"[CMSolar Monthly] {plant_name} ({start_month} ~ {end_month})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = { + 'login_id': login_id, + 'login_pw': login_pw, + 'site_no': site_no + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + # 사이트 선택 (필수!) + try: + change_url = f"{base_url}/change.php?site={site_no}" + session.get(change_url, headers=headers) + print(" ✓ Site selected") + except Exception as e: + print(f" ✗ Site selection error: {e}") + return results + # 연도별로 반복 (type=year는 한 해 치 월별 데이터 반환) + current_date = datetime.strptime(start_month + '-01', '%Y-%m-%d') + end_date = datetime.strptime(end_month + '-01', '%Y-%m-%d') + + years_processed = set() + + while current_date <= end_date: + year_start = current_date.strftime('%Y-01-01') + year = current_date.year + + # 중복 연도 스킵 + if year in years_processed: + current_date += relativedelta(months=1) + continue + + years_processed.add(year) + + # 실제 확인된 월별 엔드포인트 (type=year) + params = { + 'mode': 'getPowers', + 'type': 'year', + 'device': 'total', + 'start': year_start, + 'money': '' + } + + try: + res = session.get(data_url, params=params, headers=headers, timeout=10) + + if res.status_code == 200: + data = res.json() + # 월별 데이터 파싱 + monthly_data = data.get('data', []) or data.get('list', []) or data.get('powers', []) + + if isinstance(monthly_data, list) and len(monthly_data) > 0: + print(f" ✓ Found {len(monthly_data)} monthly records for {year}") + + for item in monthly_data: + month_str = item.get('month', item.get('date', '')) + generation_kwh = safe_float(item.get('power', item.get('generation', item.get('kwh', 0)))) + + # YYYY-MM 형식으로 정규화 + if len(month_str) >= 7: + month_str = month_str[:7] + + # 월 범위 필터링 + if month_str >= start_month and month_str <= end_month: + results.append({ + 'plant_id': plant_id, + 'month': month_str, + 'generation_kwh': generation_kwh + }) + print(f" ✓ {month_str}: {generation_kwh:.1f}kWh") + else: + print(f" ✗ HTTP {res.status_code} for {year}") + + except Exception as e: + print(f" ✗ Error: {e}") + + current_date += relativedelta(months=1) + + print(f"[Total] Collected {len(results)} monthly records\n") + return results diff --git a/crawlers/hyundai.py b/crawlers/hyundai.py new file mode 100644 index 0000000..a1fb2ee --- /dev/null +++ b/crawlers/hyundai.py @@ -0,0 +1,489 @@ +# ========================================== +# crawlers/hyundai.py - 현대 크롤러 (8호기) +# ========================================== + +import requests +from .base import create_session + +def fetch_data(plant_info): + """ + 현대 발전소 데이터 수집 (Hi-Smart 3.0) + """ + plant_id = plant_info.get('id', 'hyundai-08') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + company_name = plant_info.get('company_name', '태양과바람') + plant_name = plant_info.get('name', '8호기') + + user_id = auth.get('user_id', '') + password = auth.get('password', '') + site_id = auth.get('site_id', '') + + base_url = system.get('base_url', '') + login_path = system.get('login_path', '') + data_path = system.get('data_path', '') + + session = create_session() + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Content-Type': 'application/json;charset=UTF-8', + 'Accept': 'application/json, text/plain, */*', + 'Origin': base_url, + 'Referer': f'{base_url}/', + 'X-ApiVersion': 'v1.0', + 'X-App': 'HIWAY4VUETIFY', + 'X-CallType': '0', + 'X-Channel': 'WEB_PC', + 'X-Lang': 'ko', + 'X-Mid': 'login', + 'X-VName': 'UI' + } + + # 로그인 + login_urls = [ + f"{base_url}{login_path}", + f"{base_url}{login_path}.json", + f"{base_url}{login_path}.do" + ] + + login_success = False + + for url in login_urls: + try: + payload = {"user_id": user_id, "password": password} + res = session.post(url, json=payload, headers=headers) + + if res.status_code == 200: + auth_token = res.headers.get('x-auth-token') + if auth_token: + headers['x-auth-token'] = auth_token + print(f" [현대] 로그인 성공 & 토큰 확보!") + login_success = True + break + + except Exception: + continue + + if not login_success: + print(f"❌ 현대 {plant_name} 로그인 실패") + return [] + + # 데이터 요청 + try: + data_url = f"{base_url}{data_path}" + params = {'site_id': site_id} + + # 데이터 요청용 헤더 업데이트 + headers['X-Channel'] = 'WEB_PCWeb' + headers['X-Mid'] = 'siteWork' + + res = session.get(data_url, params=params, headers=headers) + + if res.status_code != 200: + print(f"❌ 현대 데이터 요청 실패 (코드: {res.status_code})") + return [] + + data = res.json() + + if 'datas' in data and 'unitedSiteInfo' in data['datas']: + info = data['datas']['unitedSiteInfo'] + + curr_kw = float(info.get('PVPCS_Pac', '0').replace(',', '')) + today_kwh = float(info.get('PVPCS_Daily_P', '0').replace(',', '')) + + print(f" [현대] {plant_name} 데이터: {curr_kw}kW / {today_kwh}kWh") + return [{ + 'id': plant_id, + 'name': f'{company_name} {plant_name}', + 'kw': curr_kw, + 'today': today_kwh, + 'status': "🟢 정상" if curr_kw > 0 else "💤 대기" + }] + else: + print(f"⚠️ 현대 데이터 구조가 다릅니다.") + return [] + + except Exception as e: + print(f"❌ 현대 파싱 에러: {e}") + return [] + + +def fetch_history_hourly(plant_info, start_date, end_date): + """ + 현대 발전소의 시간대별 과거 데이터 수집 + + Args: + plant_info: { + 'id': 'hyundai-08', + 'name': '8호기', + 'type': 'hyundai', + 'auth': {'user_id': '...', 'password': '...', 'site_id': '...'}, + 'system': {'base_url': '...', 'login_path': '...', 'data_path': '...'}, + 'company_name': '태양과바람' + } + start_date: str, 시작일 (YYYY-MM-DD) + end_date: str, 종료일 (YYYY-MM-DD) + + Returns: + list: [{ + 'plant_id': 'hyundai-08', + 'timestamp': '2026-01-15 14:00:00', + 'generation_kwh': 123.5, + 'current_kw': 15.2 + }, ...] + """ + from datetime import datetime, timedelta + from .base import safe_float + + results = [] + + # 설정 추출 + plant_id = plant_info.get('id', 'hyundai-08') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '8호기') + + user_id = auth.get('user_id', '') + password = auth.get('password', '') + site_id = auth.get('site_id', '') + + base_url = system.get('base_url', '') + login_path = system.get('login_path', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Hyundai History] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + # 로그인 + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Content-Type': 'application/json;charset=UTF-8', + 'Accept': 'application/json, text/plain, */*', + 'Origin': base_url, + 'Referer': f'{base_url}/', + 'X-ApiVersion': 'v1.0', + 'X-App': 'HIWAY4VUETIFY', + 'X-CallType': '0', + 'X-Channel': 'WEB_PC', + 'X-Lang': 'ko', + 'X-Mid': 'login', + 'X-VName': 'UI' + } + + login_urls = [ + f"{base_url}{login_path}", + f"{base_url}{login_path}.json", + f"{base_url}{login_path}.do" + ] + + login_success = False + for url in login_urls: + try: + payload = {"user_id": user_id, "password": password} + res = session.post(url, json=payload, headers=headers) + + if res.status_code == 200: + auth_token = res.headers.get('x-auth-token') + if auth_token: + headers['x-auth-token'] = auth_token + print(f" ✓ Login successful") + login_success = True + break + except Exception: + continue + + if not login_success: + print(f" ✗ Login failed") + return results + + # 날짜 범위 반복 + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + headers['X-Mid'] = 'siteWork' + + while current_date <= end_dt: + date_str = current_date.strftime('%Y-%m-%d') + + print(f"\n[Processing Date] {date_str}") + + # getSolraDayWork 엔드포인트 사용 (20분 간격 데이터) + url = f"{base_url}/hismart/site/getSolraDayWork" + params = { + 'site_id': site_id, + 'startDate': date_str # YYYY-MM-DD 형식 + } + + try: + res = session.get(url, params=params, headers=headers, timeout=10) + + if res.status_code == 200: + data = res.json() + + # solraDayWork 구조 파싱 + day_work = data.get('datas', {}).get('solraDayWork', {}) + run_data = day_work.get('runData', []) + run_time = day_work.get('runTime', []) + + if run_data and run_time and len(run_data) == len(run_time): + print(f" ✓ Found {len(run_data)} records (20-min intervals)") + + # runData와 runTime을 조합하여 시간대별 데이터 생성 + for i in range(len(run_data)): + time_str = run_time[i] # "14:20" 형식 + generation_kw = safe_float(run_data[i]) # kW 값 + + # timestamp 생성 + timestamp = f"{date_str} {time_str}:00" + + # 20분 간격 데이터를 그대로 저장 (또는 시간 단위로 집계 가능) + results.append({ + 'plant_id': plant_id, + 'timestamp': timestamp, + 'generation_kwh': generation_kw, # 실제로는 순간 kW값 + 'current_kw': generation_kw + }) + + print(f" → Collected {len(run_data)} records") + else: + print(f" ⚠ No data for {date_str}") + else: + print(f" ✗ HTTP {res.status_code}") + + except Exception as e: + print(f" ✗ Error: {e}") + + # 다음 날짜로 + current_date += timedelta(days=1) + + print(f"\n{'='*60}") + print(f"[Total] Collected {len(results)} records") + print(f"{'='*60}\n") + + return results + + +def fetch_history_daily(plant_info, start_date, end_date): + """ + 현대 발전소의 일별 과거 데이터 수집 (월 단위 최적화) + getSolraMonthWork API를 사용하여 한 달치 일별 데이터를 한 번에 가져옴 + """ + from datetime import datetime + from dateutil.relativedelta import relativedelta + from .base import safe_float + import calendar + + results = [] + plant_id = plant_info.get('id', 'hyundai-08') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '8호기') + + user_id = auth.get('user_id', '') + password = auth.get('password', '') + site_id = auth.get('site_id', '') + base_url = system.get('base_url', '') + login_path = system.get('login_path', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Hyundai Daily] {plant_name} ({start_date} ~ {end_date}) - Looping by Month") + print(f"{'='*60}") + + # 로그인 + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/json;charset=UTF-8', + 'X-ApiVersion': 'v1.0', + 'X-App': 'HIWAY4VUETIFY', + 'X-Channel': 'WEB_PC', + 'X-Lang': 'ko', + 'X-Mid': 'login', + 'X-VName': 'UI' + } + + login_url = f"{base_url}{login_path}" + payload = {"user_id": user_id, "password": password} + try: + res = session.post(login_url, json=payload, headers=headers) + auth_token = res.headers.get('x-auth-token') + + if not auth_token: + print(" ✗ Login failed") + return results + + headers['x-auth-token'] = auth_token + headers['X-Mid'] = 'siteWork' + print(" ✓ Login successful") + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 월 단위 반복 + current_month = datetime.strptime(start_date[:7], '%Y-%m') # YYYY-MM-01 + end_month_dt = datetime.strptime(end_date[:7], '%Y-%m') + + while current_month <= end_month_dt: + month_str = current_month.strftime('%Y-%m') + year = current_month.year + month = current_month.month + + print(f" [Fetching] {month_str} ...", end="", flush=True) + + url = f"{base_url}/hismart/site/getSolraMonthWork" + params = {'site_id': site_id, 'month': month_str} + + try: + res = session.get(url, params=params, headers=headers, timeout=10) + + if res.status_code == 200: + data = res.json() + day_work = data.get('datas', {}).get('solraMonthWork', {}) + run_data = day_work.get('runData', []) + + if run_data: + count = 0 + for day_idx, val in enumerate(run_data): + day = day_idx + 1 + daily_total = safe_float(val) + + # 유효한 날짜인지 확인 (예: 2월 30일 방지) + try: + # 해당 월의 마지막 날짜 확인 + last_day = calendar.monthrange(year, month)[1] + if day > last_day: + continue + + date_str = f"{year}-{month:02d}-{day:02d}" + + # 요청된 날짜 범위 내인지 확인 + if date_str >= start_date and date_str <= end_date: + results.append({ + 'plant_id': plant_id, + 'date': date_str, + 'generation_kwh': round(daily_total, 2) + }) + count += 1 + except ValueError: + continue + + print(f" OK ({count} days)") + else: + print(f" No data") + else: + print(f" HTTP {res.status_code}") + + except Exception as e: + print(f" Error: {e}") + + current_month += relativedelta(months=1) + + print(f"\n[Total] Collected {len(results)} daily records\n") + return results + + +def fetch_history_monthly(plant_info, start_month, end_month): + """ + 현대 발전소의 월별 과거 데이터 수집 + + Args: + plant_info: 발전소 정보 + start_month: str, 시작월 (YYYY-MM) + end_month: str, 종료월 (YYYY-MM) + + Returns: + list: [{'plant_id': '...', 'month': '2026-01', 'generation_kwh': 12345.6}, ...] + """ + from datetime import datetime + from dateutil.relativedelta import relativedelta + from .base import safe_float + + results = [] + plant_id = plant_info.get('id', 'hyundai-08') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '8호기') + + user_id = auth.get('user_id', '') + password = auth.get('password', '') + site_id = auth.get('site_id', '') + base_url = system.get('base_url', '') + login_path = system.get('login_path', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Hyundai Monthly] {plant_name} ({start_month} ~ {end_month})") + print(f"{'='*60}") + + # 로그인 + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/json;charset=UTF-8', + 'X-ApiVersion': 'v1.0', + 'X-App': 'HIWAY4VUETIFY', + 'X-Channel': 'WEB_PC', + 'X-Lang': 'ko', + 'X-Mid': 'login', + 'X-VName': 'UI' + } + + login_url = f"{base_url}{login_path}" + payload = {"user_id": user_id, "password": password} + res = session.post(login_url, json=payload, headers=headers) + auth_token = res.headers.get('x-auth-token') + + if not auth_token: + print(" ✗ Login failed") + return results + + headers['x-auth-token'] = auth_token + headers['X-Mid'] = 'siteWork' + print(" ✓ Login successful") + + current_month = datetime.strptime(start_month, '%Y-%m') + end_month_dt = datetime.strptime(end_month, '%Y-%m') + + while current_month <= end_month_dt: + month_str = current_month.strftime('%Y-%m') + + try: + # 실제 확인된 월별 엔드포인트: getSolraMonthWork + url = f"{base_url}/hismart/site/getSolraMonthWork" + params = { + 'site_id': site_id, + 'month': month_str # YYYY-MM 형식 + } + + res = session.get(url, params=params, headers=headers, verify=False, timeout=10) + + if res.status_code == 200: + data = res.json() + + # 응답 구조: datas.solraMonthWork.runData = 일별 발전량 배열 + if 'datas' in data and 'solraMonthWork' in data['datas']: + month_data = data['datas']['solraMonthWork'] + run_data = month_data.get('runData', []) + + # runData는 해당 월의 일별 발전량 배열 → 합산 + monthly_kwh = sum(run_data) if run_data else 0.0 + + print(f" ✓ {month_str}: {monthly_kwh:.1f}kWh (from {len(run_data)} days)") + + results.append({ + 'plant_id': plant_id, + 'month': month_str, + 'generation_kwh': monthly_kwh + }) + + except Exception as e: + print(f" ✗ {month_str}: {e}") + + current_month += relativedelta(months=1) + + print(f"[Total] Collected {len(results)} monthly records\n") + return results diff --git a/crawlers/kremc.py b/crawlers/kremc.py new file mode 100644 index 0000000..035a9fd --- /dev/null +++ b/crawlers/kremc.py @@ -0,0 +1,559 @@ +# ========================================== +# crawlers/kremc.py - KREMC 크롤러 (5호기) +# ========================================== + +import requests +import urllib.parse +from .base import safe_float, create_session + +def fetch_data(plant_info): + """ + KREMC 발전소 데이터 수집 + """ + # 설정 추출 + plant_id = plant_info.get('id', 'kremc-05') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + company_name = plant_info.get('company_name', '태양과바람') + plant_name = plant_info.get('name', '5호기') + + user_id = auth.get('user_id', '') + password = auth.get('password', '') + + login_url = system.get('login_url', '') + api_base = system.get('api_base', '') + enso_type = system.get('enso_type', '15001') + + try: + session = create_session() + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Content-Type': 'application/json', + 'Accept': 'application/json, text/plain, */*', + 'Origin': 'https://kremc.kr', + 'Referer': 'https://kremc.kr/login' + } + + # 1. 로그인 + login_data = {'userId': user_id, 'password': password} + login_res = session.post(login_url, json=login_data, headers=headers, timeout=10) + + if login_res.status_code != 200: + print(f" ⚠️ KREMC 로그인 실패: {login_res.status_code}") + return [] + + try: + login_json = login_res.json() + + if login_json.get('status') == 200 or login_json.get('code') == 'S001': + data = login_json.get('data') + + if isinstance(data, str) and len(data) > 10: + token = data + elif isinstance(data, dict): + token = data.get('token') or data.get('accessToken') or data.get('jwt') + if not token: + return [] + else: + return [] + else: + print(f" ⚠️ KREMC 로그인 실패: {login_json.get('message', 'Unknown')}") + return [] + except: + return [] + + print(f" [KREMC] 토큰 획득 성공") + + # 2. API 헤더 설정 + api_headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Accept': 'application/json', + 'X-Auth-Token': token + } + + installer_id_encoded = urllib.parse.quote(user_id) + + # 3. 실시간 발전량 (kW) + latest_url = f"{api_base}/monitor/installer/gath/latest?installerId={installer_id_encoded}&ensoTypeCode={enso_type}" + latest_res = session.get(latest_url, headers=api_headers, timeout=10) + + current_kw = 0.0 + if latest_res.status_code == 200: + try: + latest_data = latest_res.json() + data = latest_data.get('data', {}) + if isinstance(data, dict): + watts = safe_float(data.get('outpElcpFigr', 0)) + current_kw = watts / 1000.0 if watts > 0 else 0.0 + except: + pass + + # 4. 일일 발전량 (kWh) + energy_url = f"{api_base}/monitor/installer/gath/energy?installerId={installer_id_encoded}&ensoTypeCode={enso_type}&cid=" + energy_res = session.get(energy_url, headers=api_headers, timeout=10) + + today_kwh = 0.0 + if energy_res.status_code == 200: + try: + energy_data = energy_res.json() + data = energy_data.get('data', {}) + if isinstance(data, dict): + today_kwh = safe_float(data.get('dayEnergy', 0)) + except: + pass + + print(f" [KREMC] {plant_name} 데이터: {current_kw} kW / {today_kwh} kWh") + + return [{ + 'id': plant_id, + 'name': f'{company_name} {plant_name}', + 'kw': current_kw, + 'today': today_kwh, + 'status': '🟢 정상' if current_kw > 0 else '💤 대기' + }] + + except Exception as e: + print(f" ❌ KREMC 오류: {e}") + return [] + + +def fetch_history_hourly(plant_info, start_date, end_date): + """ + KREMC 발전소의 시간대별 과거 데이터 수집 + + Args: + plant_info: dict, 발전소 정보 + start_date: str, 시작일 (YYYY-MM-DD) + end_date: str, 종료일 (YYYY-MM-DD) + + Returns: + list: 시간대별 데이터 레코드 + """ + from datetime import datetime, timedelta + import urllib.parse + + results = [] + + # 설정 추출 + plant_id = plant_info.get('id', 'kremc-05') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + options = plant_info.get('options', {}) + plant_name = plant_info.get('name', '5호기') + + user_id = auth.get('user_id', '') + password = auth.get('password', '') + login_url = system.get('login_url', '') + api_base = system.get('api_base', '') + enso_type = system.get('enso_type', '15001') + + # KREMC 추가 파라미터 + cid = options.get('cid', '10013000376') + city_prov_code = options.get('cityProvCode', '11') + rgn_code = options.get('rgnCode', '11410') + dong_code = options.get('dongCode', '1141011700') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[KREMC History] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + # 로그인 + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Content-Type': 'application/json', + 'Accept': 'application/json, text/plain, */*', + 'Origin': 'https://kremc.kr', + 'Referer': 'https://kremc.kr/login' + } + + try: + login_data = {'userId': user_id, 'password': password} + login_res = session.post(login_url, json=login_data, headers=headers, timeout=10) + + if login_res.status_code != 200: + print(f" ✗ Login failed: {login_res.status_code}") + return results + + login_json = login_res.json() + + if login_json.get('status') == 200 or login_json.get('code') == 'S001': + data = login_json.get('data') + + if isinstance(data, str) and len(data) > 10: + token = data + elif isinstance(data, dict): + token = data.get('token') or data.get('accessToken') or data.get('jwt') + if not token: + print(f" ✗ Token not found") + return results + else: + print(f" ✗ Invalid login data") + return results + else: + print(f" ✗ Login failed: {login_json.get('message', 'Unknown')}") + return results + + print(f" ✓ Login successful") + + # API 헤더 설정 + api_headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Accept': 'application/json', + 'X-Auth-Token': token + } + + # 날짜 범위 반복 + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + while current_date <= end_dt: + date_str = current_date.strftime('%Y-%m-%d') + print(f"\n[Processing Date] {date_str}") + + # 실제 확인된 시간별 엔드포인트 + hourly_url = f"{api_base}/stat/userbyuser/meainDataList" + params = { + 'cid': cid, + 'userId': user_id, + 'cityProvCode': city_prov_code, + 'rgnCode': rgn_code, + 'dongCode': dong_code, + 'dateType': 'HH', + 'startGathDtm': date_str, + 'endGathDtm': date_str, + 'ensoTypeCode': enso_type + } + + try: + res = session.get(hourly_url, params=params, headers=api_headers, timeout=10) + + if res.status_code == 200: + data = res.json() + # KREMC 실제 응답 구조: data.userByTimeDataResultDtoList + hourly_list = data.get('data', {}).get('userByTimeDataResultDtoList', []) + + if isinstance(hourly_list, list) and len(hourly_list) > 0: + print(f" ✓ Found {len(hourly_list)} hourly records") + + for item in hourly_list: + # gathDtm: "00시", "01시", ..., "23시" + time_str = item.get('gathDtm', '') + hour = time_str.replace('시', '').zfill(2) + generation_kwh = safe_float(item.get('dayEnergy', 0)) + + timestamp = f"{date_str} {hour}:00:00" + + results.append({ + 'plant_id': plant_id, + 'timestamp': timestamp, + 'generation_kwh': generation_kwh, + 'current_kw': 0 + }) + else: + print(f" ⚠ No hourly data for {date_str}") + else: + print(f" ✗ HTTP {res.status_code}") + + except Exception as e: + print(f" ✗ Error: {e}") + + # 다음 날짜로 + current_date += timedelta(days=1) + + except Exception as e: + print(f" ✗ Overall error: {e}") + + print(f"\n{'='*60}") + print(f"[Total] Collected {len(results)} hourly records") + print(f"{'='*60}\n") + + return results + + +def fetch_history_daily(plant_info, start_date, end_date): + """ + KREMC 발전소의 일별 과거 데이터 수집 (월 단위 분할) + + Args: + plant_info: 발전소 정보 + start_date: str, 시작일 (YYYY-MM-DD) + end_date: str, 종료일 (YYYY-MM-DD) + """ + from datetime import datetime, timedelta + from dateutil.relativedelta import relativedelta + import calendar + import urllib.parse + + results = [] + plant_id = plant_info.get('id', 'kremc-05') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + options = plant_info.get('options', {}) + plant_name = plant_info.get('name', '5호기') + + user_id = auth.get('user_id', '') + password = auth.get('password', '') + login_url = system.get('login_url', '') + api_base = system.get('api_base', '') + enso_type = system.get('enso_type', '15001') + + # KREMC 추가 파라미터 + cid = options.get('cid', '10013000376') + city_prov_code = options.get('cityProvCode', '11') + rgn_code = options.get('rgnCode', '11410') + dong_code = options.get('dongCode', '1141011700') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[KREMC Daily] {plant_name} ({start_date} ~ {end_date}) - Looping by Month") + print(f"{'='*60}") + + # 로그인 + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/json', + 'Accept': 'application/json' + } + + try: + login_data = {'userId': user_id, 'password': password} + login_res = session.post(login_url, json=login_data, headers=headers, timeout=10) + + if login_res.status_code != 200: + print(" ✗ Login failed") + return results + + login_json = login_res.json() + data = login_json.get('data') + token = data if isinstance(data, str) else data.get('token') if isinstance(data, dict) else None + + if not token: + print(" ✗ Token not found") + return results + + print(" ✓ Login successful") + + api_headers = { + 'User-Agent': 'Mozilla/5.0', + 'Accept': 'application/json', + 'X-Auth-Token': token + } + + # 월 단위 루프 적용 + current_date_dt = datetime.strptime(start_date, '%Y-%m-%d') + end_date_dt = datetime.strptime(end_date, '%Y-%m-%d') + + # 시작하는 달의 첫날로 맞춤 (단, 실제 요청 시에는 start_date 고려) + # 하지만 그냥 편의상 start_date가 속한 달부터 end_date가 속한 달까지 루프 돌면서 + # API 요청 범위를 정교하게 자르는 게 좋음. + + # 루프용 변수: 현재 처리 중인 기간의 시작일 + loop_start = current_date_dt + + while loop_start <= end_date_dt: + # 현재 달의 마지막 날 계산 + last_day_of_month = calendar.monthrange(loop_start.year, loop_start.month)[1] + loop_end = loop_start.replace(day=last_day_of_month) + + # 종료일이 전체 종료일보다 뒤면 조정 + if loop_end > end_date_dt: + loop_end = end_date_dt + + s_str = loop_start.strftime('%Y-%m-%d') + e_str = loop_end.strftime('%Y-%m-%d') + + print(f" [Fetching] {s_str} ~ {e_str} ...", end="", flush=True) + + try: + daily_url = f"{api_base}/stat/userbyuser/meainDataList" + params = { + 'cid': cid, + 'userId': user_id, + 'cityProvCode': city_prov_code, + 'rgnCode': rgn_code, + 'dongCode': dong_code, + 'dateType': 'DD', + 'startGathDtm': s_str, + 'endGathDtm': e_str, + 'ensoTypeCode': enso_type + } + + res = session.get(daily_url, params=params, headers=api_headers, timeout=15) + + if res.status_code == 200: + data = res.json() + daily_list = data.get('data', {}).get('userByTimeDataResultDtoList', []) + + if daily_list: + count = 0 + for item in daily_list: + # gathDtm: "2026-01-01" 형식 + date_str = item.get('gathDtm', '') + generation_kwh = safe_float(item.get('dayEnergy', 0)) + + # 날짜 문자열 정리 (혹시 모를 공백 등 제거) + date_str = date_str.strip() + if len(date_str) > 10: + date_str = date_str[:10] + + results.append({ + 'plant_id': plant_id, + 'date': date_str, + 'generation_kwh': generation_kwh, + 'current_kw': 0 + }) + count += 1 + print(f" OK ({count} days)") + else: + print(" No data") + else: + print(f" HTTP {res.status_code}") + + except Exception as e: + print(f" Error: {e}") + + # 다음 기간 설정 (현재 기간 끝 다음날) + loop_start = loop_end + timedelta(days=1) + + except Exception as e: + print(f" ✗ Overall Error: {e}") + + print(f"\n[Total] Collected {len(results)} daily records\n") + return results + + +def fetch_history_monthly(plant_info, start_month, end_month): + """ + KREMC 발전소의 월별 과거 데이터 수집 + + ⚠️ KREMC는 dateType=MM을 지원하지 않음 (500 에러) + → 일별 데이터(dateType=DD)를 월별로 집계 + """ + from datetime import datetime + from dateutil.relativedelta import relativedelta + import urllib.parse + + results = [] + plant_id = plant_info.get('id', 'kremc-05') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + options = plant_info.get('options', {}) + plant_name = plant_info.get('name', '5호기') + + # 시작일자 체크 + plant_start_date = plant_info.get('start_date', '2018-06-28') + plant_start_month = plant_start_date[:7] # YYYY-MM + + # 실제 시작 월은 발전소 가동일 이후로 제한 + if start_month < plant_start_month: + actual_start = plant_start_month + print(f" ℹ 발전소 가동일({plant_start_date}) 이후부터 수집: {actual_start}") + else: + actual_start = start_month + + user_id = auth.get('user_id', '') + password = auth.get('password', '') + login_url = system.get('login_url', '') + api_base = system.get('api_base', '') + enso_type = system.get('enso_type', '15001') + + # KREMC 추가 파라미터 + cid = options.get('cid', '10013000376') + city_prov_code = options.get('cityProvCode', '11') + rgn_code = options.get('rgnCode', '11410') + dong_code = options.get('dongCode', '1141011700') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[KREMC Monthly] {plant_name} ({actual_start} ~ {end_month})") + print(f"{'='*60}") + + # 로그인 + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/json', + 'Accept': 'application/json' + } + + login_data = {'userId': user_id, 'password': password} + login_res = session.post(login_url, json=login_data, headers=headers, timeout=10) + + if login_res.status_code != 200: + print(" ✗ Login failed") + return results + + login_json = login_res.json() + data = login_json.get('data') + token = data if isinstance(data, str) else data.get('token') if isinstance(data, dict) else None + + if not token: + print(" ✗ Token not found") + return results + + print(" ✓ Login successful") + + api_headers = { + 'User-Agent': 'Mozilla/5.0', + 'Accept': 'application/json', + 'X-Auth-Token': token + } + + current_month = datetime.strptime(actual_start, '%Y-%m') + end_month_dt = datetime.strptime(end_month, '%Y-%m') + + while current_month <= end_month_dt: + month_str = current_month.strftime('%Y-%m') + + # 해당 월의 시작일과 마지막일 계산 + first_day = current_month.strftime('%Y-%m-01') + if current_month.month == 12: + last_day = current_month.replace(day=31).strftime('%Y-%m-%d') + else: + next_month = current_month + relativedelta(months=1) + last_day = (next_month - relativedelta(days=1)).strftime('%Y-%m-%d') + + try: + # dateType=DD로 일별 데이터를 가져와서 합산 + daily_url = f"{api_base}/stat/userbyuser/meainDataList" + params = { + 'cid': cid, + 'userId': user_id, + 'cityProvCode': city_prov_code, + 'rgnCode': rgn_code, + 'dongCode': dong_code, + 'dateType': 'DD', + 'startGathDtm': first_day, + 'endGathDtm': last_day, + 'ensoTypeCode': enso_type + } + + res = session.get(daily_url, params=params, headers=api_headers, timeout=10) + + if res.status_code == 200: + data = res.json() + # KREMC 실제 응답 구조: data.userByTimeDataResultDtoList + daily_list = data.get('data', {}).get('userByTimeDataResultDtoList', []) + + if isinstance(daily_list, list) and len(daily_list) > 0: + # 일별 데이터를 합산하여 월별 데이터 생성 + monthly_total = sum([safe_float(item.get('dayEnergy', 0)) for item in daily_list]) + + results.append({ + 'plant_id': plant_id, + 'month': month_str, + 'generation_kwh': monthly_total + }) + print(f" ✓ {month_str}: {monthly_total:.1f}kWh (from {len(daily_list)} days)") + + except Exception as e: + print(f" ✗ Error for {month_str}: {e}") + + # 다음 달로 + current_month += relativedelta(months=1) + + print(f"[Total] Collected {len(results)} monthly records\n") + return results diff --git a/crawlers/nrems.py b/crawlers/nrems.py new file mode 100644 index 0000000..3d1d784 --- /dev/null +++ b/crawlers/nrems.py @@ -0,0 +1,618 @@ +# ========================================== +# crawlers/nrems.py - NREMS 크롤러 (1,2,3,4,9호기) +# ========================================== + +import requests +import json +import re +from datetime import datetime +from .base import safe_float, create_session, format_result + +def _get_inverter_sums(session, pscode, system_config): + """ + 1, 2호기 인버터별 일일 발전량 추출 (JSON API 사용) + """ + try: + today_str = datetime.now().strftime('%Y-%m-%d') + month_str = datetime.now().strftime('%Y-%m') + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'Referer': f'http://www.nrems.co.kr/v2/local/comp/cp_inv_time.php?pscode={pscode}' + } + + data = { + 'act': 'getList', + 's_day': today_str, + 's_date': today_str, + 'e_date': today_str, + 's_mon': month_str, + 'e_mon': month_str, + 'pscode': pscode, + 'dispType': 'time' + } + + inv_proc_url = system_config.get('inv_proc_url', '') + res = session.post(inv_proc_url, data=data, headers=headers, timeout=10) + + if res.status_code == 200: + try: + json_data = res.json() + invlist = json_data.get('invlist', []) + + sum_1 = 0.0 + sum_2 = 0.0 + + for inv in invlist: + tidx = str(inv.get('tidx', '')) + sum_pw = safe_float(inv.get('sumPw')) + + if tidx == '1': + sum_1 = sum_pw + elif tidx == '2': + sum_2 = sum_pw + + if sum_1 > 0 or sum_2 > 0: + print(f" [API] 인버터 합계 추출 성공! (인버터1: {sum_1} kWh / 인버터2: {sum_2} kWh)") + return sum_1, sum_2 + else: + print(f" ⚠️ API 응답에 인버터 데이터 없음") + return 0.0, 0.0 + + except json.JSONDecodeError: + print(f" ⚠️ JSON 파싱 실패") + return 0.0, 0.0 + else: + print(f" ⚠️ API 응답 오류: {res.status_code}") + return 0.0, 0.0 + + except Exception as e: + print(f" [에러] {e}") + return 0.0, 0.0 + +def fetch_data(plant_info): + """ + NREMS 발전소 데이터 수집 + + Args: + plant_info: { + 'id': 'nrems-03', # DB용 고유 ID (is_split인 경우 없음) + 'name': '...', + 'type': 'nrems', + 'auth': {'pscode': '...'}, + 'options': {'is_split': True/False}, + 'system': {'api_url': '...', 'inv_proc_url': '...'}, + 'company_name': '...' + } + + Returns: + list: [{'id': '...', 'name': '...', 'kw': 10.5, 'today': 100.0, 'status': '...'}] + """ + results = [] + + # 설정 추출 + plant_id = plant_info.get('id', '') # DB용 고유 ID + pscode = plant_info['auth'].get('pscode', '') + is_split = plant_info['options'].get('is_split', False) + system_config = plant_info.get('system', {}) + company_name = plant_info.get('company_name', '태양과바람') + plant_name = plant_info.get('name', '') + + session = create_session() + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + try: + # 메인 데이터 요청 + api_url = system_config.get('api_url', '') + res = session.post(api_url, data={'pscode': pscode}, headers=headers, timeout=10) + + if res.status_code != 200: + return results + + try: + data = res.json() + except: + return results + + # 데이터 찾기 + ps_list = data.get('ps_status') + target_data = None + if isinstance(ps_list, list): + for item in ps_list: + code_in_res = item.get('pscode') + wmu_in_res = item.get('WMU_CODE') + + # Case-insensitive comparison + if (code_in_res and code_in_res.lower() == pscode.lower()) or \ + (wmu_in_res and wmu_in_res.lower() == pscode.lower()): + target_data = item + break + + if not target_data and len(ps_list) > 0: + print(f" ⚠️ Target pscode '{pscode}' not found in response. Available: {[i.get('pscode') for i in ps_list]}") + target_data = ps_list[0] # Fallback + print(f" ⚠️ Using fallback: {target_data.get('pscode')}") + elif isinstance(ps_list, dict): + target_data = ps_list + if not target_data: + target_data = {} + + total_kw = safe_float(target_data.get('KW')) + total_today = safe_float(target_data.get('TDayKWH')) + inverters = data.get('ivt_value', []) + + # Case A: 1, 2호기 분리 처리 + if is_split: + real_sum_1, real_sum_2 = _get_inverter_sums(session, pscode, system_config) + + kw_1 = safe_float(inverters[0].get('KW')) if len(inverters) >= 1 else 0.0 + kw_2 = safe_float(inverters[1].get('KW')) if len(inverters) >= 2 else 0.0 + + if (real_sum_1 + real_sum_2) > 0: + today_1 = real_sum_1 + today_2 = real_sum_2 + else: + print(" ⚠️ 백업 로직(비율) 가동") + inv_total = kw_1 + kw_2 + if inv_total > 0: + today_1 = total_today * (kw_1 / inv_total) + today_2 = total_today * (kw_2 / inv_total) + else: + today_1 = total_today / 2 + today_2 = total_today / 2 + + # [중요] 1, 2호기는 ID를 강제 지정 + results.append({ + 'id': 'nrems-01', # 1호기 고정 ID + 'name': f'{company_name} 1호기', + 'kw': kw_1, + 'today': round(today_1, 2), + 'status': "🟢 정상" if kw_1 > 0 else "💤 대기" + }) + results.append({ + 'id': 'nrems-02', # 2호기 고정 ID + 'name': f'{company_name} 2호기', + 'kw': kw_2, + 'today': round(today_2, 2), + 'status': "🟢 정상" if kw_2 > 0 else "💤 대기" + }) + + # Case B: 3, 4, 9호기 + else: + results.append({ + 'id': plant_id, # config에서 정의된 ID 사용 + 'name': f'{company_name} {plant_name}', + 'kw': total_kw, + 'today': total_today, + 'status': "🟢 정상" if total_kw > 0 else "💤 대기" + }) + + except Exception as e: + print(f"❌ NREMS {plant_name} 오류: {e}") + if not is_split: + results.append({ + 'id': plant_id, + 'name': f'{company_name} {plant_name}', + 'kw': 0.0, + 'today': 0.0, + 'status': '🔴 오류' + }) + + return results + + +def fetch_history_hourly(plant_info, start_date, end_date): + """ + NREMS 발전소의 시간대별 과거 데이터 수집 + + Args: + plant_info: { + 'id': 'nrems-03', + 'name': '...', + 'type': 'nrems', + 'auth': {'pscode': '...'}, + 'options': {'is_split': True/False}, + 'system': {'api_url': '...', 'inv_proc_url': '...'}, + 'company_name': '...' + } + start_date: str, 시작일 (YYYY-MM-DD) + end_date: str, 종료일 (YYYY-MM-DD) + + Returns: + list: [{ + 'plant_id': 'nrems-03', + 'timestamp': '2026-01-15 14:00:00', + 'generation_kwh': 123.5, + 'current_kw': 15.2 + }, ...] + """ + results = [] + + # 설정 추출 + plant_id = plant_info.get('id', '') + pscode = plant_info['auth'].get('pscode', '') + is_split = plant_info['options'].get('is_split', False) + plant_name = plant_info.get('name', '') + + # 날짜 범위 생성 + from datetime import datetime, timedelta + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[NREMS Hourly] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + while current_date <= end_dt: + date_str = current_date.strftime('%Y-%m-%d') + print(f"\n[Processing Date] {date_str}") + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'X-Requested-With': 'XMLHttpRequest' + } + + try: + if is_split: + # 1,2호기: cp_inv_proc.php with dispType=time + url = 'http://www.nrems.co.kr/v2/local/proc/cp_inv_proc.php' + headers['Referer'] = f'http://www.nrems.co.kr/v2/local/comp/cp_inv_time.php?pscode={pscode}' + payload = { + 'act': 'getList', + 's_day': date_str, + 's_date': date_str, + 'e_date': date_str, + 's_mon': date_str[:7], + 'e_mon': date_str[:7], + 'pscode': pscode, + 'dispType': 'time' + } + else: + # 3,4,9호기: pl_time_proc.php with act=empty + url = 'http://www.nrems.co.kr/v2/local/proc/pl_time_proc.php' + headers['Referer'] = f'http://www.nrems.co.kr/v2/local/plant/pl_time.php?pscode={pscode}' + payload = { + 'act': 'empty', + 's_date': date_str, + 'pscode': pscode + } + + response = session.post(url, data=payload, headers=headers, timeout=10) + + if response.status_code == 200: + data = response.json() + + # 데이터 구조 확인 + if is_split: + # 1,2호기: pwdata 키 사용 + hourly_records = data.get('pwdata', []) + else: + # 3,4,9호기: pdata 키 사용 + hourly_records = data.get('pdata', []) + + if hourly_records: + print(f" ✓ Found {len(hourly_records)} hourly records") + + for hour_data in hourly_records: + if is_split: + # 1,2호기: DATE, PW1, PW2 + hour = hour_data.get('DATE', '00') + inv1_gen = safe_float(hour_data.get('PW1', 0)) + inv2_gen = safe_float(hour_data.get('PW2', 0)) + + # timestamp 생성 + timestamp = f"{date_str} {str(hour).zfill(2)}:00:00" + + results.append({ + 'plant_id': 'nrems-01', + 'timestamp': timestamp, + 'generation_kwh': inv1_gen, + 'current_kw': 0 + }) + results.append({ + 'plant_id': 'nrems-02', + 'timestamp': timestamp, + 'generation_kwh': inv2_gen, + 'current_kw': 0 + }) + else: + # 3,4,9호기: TIME, INV + time_str = hour_data.get('TIME', '00:00') + hour = time_str.split(':')[0] # "14:00" -> "14" + generation_kwh = safe_float(hour_data.get('INV', 0)) + + # timestamp 생성 + timestamp = f"{date_str} {str(hour).zfill(2)}:00:00" + + results.append({ + 'plant_id': plant_id, + 'timestamp': timestamp, + 'generation_kwh': generation_kwh, + 'current_kw': 0 + }) + + print(f" → Collected {len(hourly_records)} records") + else: + print(f" ⚠ No hourly data for {date_str}") + else: + print(f" ✗ HTTP {response.status_code}") + + except Exception as e: + print(f" ✗ Error: {e}") + + current_date += timedelta(days=1) + + print(f"\n{'='*60}") + print(f"[Total] Collected {len(results)} hourly records") + print(f"{'='*60}\n") + + return results + + +def fetch_history_daily(plant_info, start_date, end_date): + """ + NREMS 발전소의 일별 과거 데이터 수집 (월 단위 루프) + + Args: + plant_info: 발전소 정보 + start_date: str, 시작일 (YYYY-MM-DD) + end_date: str, 종료일 (YYYY-MM-DD) + + Returns: + list: [{'plant_id': '...', 'date': '2026-01-15', 'generation_kwh': 123.5}, ...] + """ + from datetime import datetime, timedelta + from dateutil.relativedelta import relativedelta + import calendar + + results = [] + + # 설정 추출 + plant_id = plant_info.get('id', '') + pscode = plant_info['auth'].get('pscode', '') + is_split = plant_info['options'].get('is_split', False) + plant_name = plant_info.get('name', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[NREMS Daily] {plant_name} ({start_date} ~ {end_date}) - Looping by Month") + print(f"{'='*60}") + + start_dt = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + current_dt = start_dt + + while current_dt <= end_dt: + # 현재 처리할 달의 시작일과 종료일 계산 + # 이번 달의 마지막 날 + last_day_of_month = calendar.monthrange(current_dt.year, current_dt.month)[1] + chunk_end_dt = current_dt.replace(day=last_day_of_month) + + # 요청 종료일이 전체 종료일보다 뒤면 전체 종료일로 제한 + if chunk_end_dt > end_dt: + chunk_end_dt = end_dt + + s_date_str = current_dt.strftime('%Y-%m-%d') + e_date_str = chunk_end_dt.strftime('%Y-%m-%d') + month_str = current_dt.strftime('%Y-%m') + + print(f" [Fetching] {s_date_str} ~ {e_date_str} ...", end="", flush=True) + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'X-Requested-With': 'XMLHttpRequest' + } + + try: + if is_split: + # 1,2호기: cp_inv_proc.php with dispType=day + url = 'http://www.nrems.co.kr/v2/local/proc/cp_inv_proc.php' + headers['Referer'] = f'http://www.nrems.co.kr/v2/local/comp/cp_inv_day.php?pscode={pscode}' + payload = { + 'act': 'getList', + 's_day': s_date_str, # s_day를 시작일로 변경 + 's_date': s_date_str, + 'e_date': e_date_str, + 's_mon': s_date_str[:7], + 'e_mon': e_date_str[:7], + 'pscode': pscode, + 'dispType': 'day' + } + else: + # 3,4,9호기: pl_day_proc.php with s_day/e_day range + url = 'http://www.nrems.co.kr/v2/local/proc/pl_day_proc.php' + headers['Referer'] = f'http://www.nrems.co.kr/v2/local/plant/pl_day.php?pscode={pscode}' + payload = { + 'act': 'empty', + 's_day': s_date_str, + 'e_day': e_date_str, + 'pscode': pscode + } + + response = session.post(url, data=payload, headers=headers, timeout=15) + + if response.status_code == 200: + try: + data = response.json() + + # 데이터 구조 확인 + if is_split: + daily_records = data.get('pwdata', []) + else: + daily_records = data.get('pdata', []) + + if daily_records: + count = 0 + for day_data in daily_records: + # 날짜 추출 + date_raw = day_data.get('DATE', '') + if not date_raw: + continue + + # 날짜 형식 변환: "12-28" -> "2025-12-28" 보정 + clean_date = date_raw + if '-' in date_raw and len(date_raw.split('-')[0]) <= 2: + mm, dd = date_raw.split('-') + year = current_dt.year + # 만약 12월 데이터인데 1월에 긁으면... 루프 변수 current_dt.year 사용하면 안전 + clean_date = f"{year}-{mm.zfill(2)}-{dd.zfill(2)}" + + if is_split: + inv1_gen = safe_float(day_data.get('PW1', 0)) + inv2_gen = safe_float(day_data.get('PW2', 0)) + + results.append({'plant_id': 'nrems-01', 'date': clean_date, 'generation_kwh': inv1_gen}) + results.append({'plant_id': 'nrems-02', 'date': clean_date, 'generation_kwh': inv2_gen}) + count += 1 + else: + generation_kwh = safe_float(day_data.get('INV', 0)) + results.append({'plant_id': plant_id, 'date': clean_date, 'generation_kwh': generation_kwh}) + count += 1 + + print(f" OK ({count} days)") + else: + print(f" No data") + except Exception as json_err: + print(f" JSON Error: {json_err}") + else: + print(f" HTTP {response.status_code}") + + except Exception as e: + print(f" Error: {e}") + + # 다음 달 1일로 이동 + current_dt = (current_dt.replace(day=1) + timedelta(days=32)).replace(day=1) + + print(f"\n[Total] Collected {len(results)} daily records\n") + return results + + +def fetch_history_monthly(plant_info, start_month, end_month): + """ + NREMS 발전소의 월별 과거 데이터 수집 + + Args: + plant_info: 발전소 정보 + start_month: str, 시작월 (YYYY-MM) + end_month: str, 종료월 (YYYY-MM) + + Returns: + list: [{'plant_id': '...', 'month': '2026-01', 'generation_kwh': 12345.6}, ...] + """ + from datetime import datetime + + results = [] + + # 설정 추출 + plant_id = plant_info.get('id', '') + pscode = plant_info['auth'].get('pscode', '') + is_split = plant_info['options'].get('is_split', False) + plant_name = plant_info.get('name', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[NREMS Monthly] {plant_name} ({start_month} ~ {end_month})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'X-Requested-With': 'XMLHttpRequest' + } + + try: + if is_split: + # 1,2호기: cp_inv_proc.php with dispType=mon + url = 'http://www.nrems.co.kr/v2/local/proc/cp_inv_proc.php' + headers['Referer'] = f'http://www.nrems.co.kr/v2/local/comp/cp_inv_month.php?pscode={pscode}' + payload = { + 'act': 'getList', + 's_day': f"{end_month}-01", + 's_date': f"{start_month}-01", + 'e_date': f"{end_month}-01", + 's_mon': start_month, + 'e_mon': end_month, + 'pscode': pscode, + 'dispType': 'mon' + } + else: + # 3,4,9호기: pl_month_proc.php with s_date/e_date (YYYY-MM) + url = 'http://www.nrems.co.kr/v2/local/proc/pl_month_proc.php' + headers['Referer'] = f'http://www.nrems.co.kr/v2/local/plant/pl_month.php?pscode={pscode}' + payload = { + 'act': 'empty', + 's_date': start_month, + 'e_date': end_month, + 'pscode': pscode + } + + response = session.post(url, data=payload, headers=headers, timeout=15) + + if response.status_code == 200: + data = response.json() + + # 데이터 구조 확인 + if is_split: + # 1,2호기: pwdata 키 사용 + monthly_records = data.get('pwdata', []) + else: + # 3,4,9호기: pdata 키 사용 + monthly_records = data.get('pdata', []) + + if monthly_records: + print(f" ✓ Found {len(monthly_records)} monthly records") + + for month_data in monthly_records: + # 월 추출 + month_str = month_data.get('DATE', '') + if not month_str: + continue + + if is_split: + # 1,2호기: PW1, PW2 분리 + inv1_gen = safe_float(month_data.get('PW1', 0)) + inv2_gen = safe_float(month_data.get('PW2', 0)) + + results.append({ + 'plant_id': 'nrems-01', + 'month': month_str, + 'generation_kwh': inv1_gen + }) + results.append({ + 'plant_id': 'nrems-02', + 'month': month_str, + 'generation_kwh': inv2_gen + }) + print(f" ✓ {month_str}: Unit1={inv1_gen}kWh, Unit2={inv2_gen}kWh") + else: + # 3,4,9호기: INV 단일값 + generation_kwh = safe_float(month_data.get('INV', 0)) + results.append({ + 'plant_id': plant_id, + 'month': month_str, + 'generation_kwh': generation_kwh + }) + print(f" ✓ {month_str}: {generation_kwh}kWh") + + print(f" → Collected {len(monthly_records)} records") + else: + print(f" ⚠ No monthly data found") + else: + print(f" ✗ HTTP {response.status_code}") + + except Exception as e: + print(f" ✗ Error: {e}") + + print(f"\n[Total] Collected {len(results)} monthly records\n") + return results diff --git a/crawlers/sun_wms.py b/crawlers/sun_wms.py new file mode 100644 index 0000000..686ed89 --- /dev/null +++ b/crawlers/sun_wms.py @@ -0,0 +1,430 @@ +# ========================================== +# crawlers/sun_wms.py - Sun-WMS 크롤러 (6호기) +# HTML 테이블 파싱 방식 +# ========================================== + +import requests +import re +import time +from .base import create_session, safe_float + +def fetch_data(plant_info): + """ + Sun-WMS 발전소 데이터 수집 + """ + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + company_name = plant_info.get('company_name', '태양과바람') + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + data_url = system.get('data_url', '') + + session = create_session() + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Referer': 'http://tb6.sun-wms.com/public/main/login.php', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + # 1. 로그인 + login_data = { + 'act': 'loginChk', + 'user_id': payload_id, + 'user_pass': payload_pw + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code != 200: + return [] + except Exception as e: + print(f"❌ {plant_name} 접속 에러: {e}") + return [] + + # 2. 데이터 요청 + try: + timestamp = int(time.time() * 1000) + res = session.get(f"{data_url}?time={timestamp}", headers=headers) + res.encoding = 'euc-kr' + + content = res.text + + match_kw = re.search(r"id=['\"]cur_power['\"].*?value=['\"]([^'\"]+)['\"]", content) + curr_kw = float(match_kw.group(1)) if match_kw else 0.0 + + match_today = re.search(r"id=['\"]today_power['\"].*?value=['\"]([^'\"]+)['\"]", content) + today_kwh = float(match_today.group(1)) if match_today else 0.0 + + status = "🟢 정상" if curr_kw > 0 else "💤 대기" + + return [{ + 'id': plant_id, + 'name': f'{company_name} {plant_name}', + 'kw': curr_kw, + 'today': today_kwh, + 'status': status + }] + + except Exception as e: + print(f"❌ {plant_name} 에러: {e}") + return [] + + +def fetch_history_hourly(plant_info, start_date, end_date): + """ + Sun-WMS 발전소의 시간대별 과거 데이터 수집 + + 실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답) + 파라미터: tab01=0&tab02=1&tab03=2&tord=1&s_day=YYYY-MM-DD + """ + from datetime import datetime, timedelta + + results = [] + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + + # base_url 추출 + base_url = system.get('base_url', '') + statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php") + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Sun-WMS History] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw} + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 날짜 반복 + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + while current_date <= end_dt: + date_str = current_date.strftime('%Y-%m-%d') + + # 실제 확인된 시간별 엔드포인트 + params = { + 'tab01': '0', + 'tab02': '1', + 'tab03': '2', + 'tord': '1', + 's_day': date_str + } + + try: + res = session.get(statics_url, params=params, headers=headers, timeout=10) + res.encoding = 'euc-kr' + + if res.status_code == 200: + # HTML 테이블 파싱 + html = res.text + + # 안의 태그 찾기 + tbody_match = re.search(r'(.*?)', html, re.DOTALL) + if tbody_match: + tbody_content = tbody_match.group(1) + + # 각 파싱 + tr_pattern = r'\s*(\d{2}):00\s*([\d.]+)\s*' + matches = re.findall(tr_pattern, tbody_content) + + if matches: + print(f" ✓ Found {len(matches)} hourly records") + + for hour, kwh in matches: + generation_kwh = safe_float(kwh) + timestamp = f"{date_str} {hour}:00:00" + + results.append({ + 'plant_id': plant_id, + 'timestamp': timestamp, + 'generation_kwh': generation_kwh, + 'current_kw': 0 + }) + else: + print(f" ⚠ No data for {date_str}") + else: + print(f" ⚠ No tbody found for {date_str}") + else: + print(f" ✗ HTTP {res.status_code}") + + except Exception as e: + print(f" ✗ Error: {e}") + + current_date += timedelta(days=1) + + print(f"\n{'='*60}") + print(f"[Total] Collected {len(results)} hourly records") + print(f"{'='*60}\n") + + return results + + +def fetch_history_daily(plant_info, start_date, end_date): + """ + Sun-WMS 발전소의 일별 과거 데이터 수집 (월 단위 분할) + + 실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답) + 파라미터: tab01=0&tab02=2&tab03=2&tord=2&s_day=YYYY-MM-DD&e_day=YYYY-MM-DD + """ + from datetime import datetime, timedelta + from dateutil.relativedelta import relativedelta + import calendar + import re + from .base import safe_float, create_session + + results = [] + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + base_url = system.get('base_url', '') + statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php") + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Sun-WMS Daily] {plant_name} ({start_date} ~ {end_date}) - Looping by Month") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + # 로그인 + try: + login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw} + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 월 단위 루프 적용 + start_dt = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + loop_start = start_dt + + while loop_start <= end_dt: + # 현재 달의 마지막 날 계산 + last_day_of_month = calendar.monthrange(loop_start.year, loop_start.month)[1] + loop_end = loop_start.replace(day=last_day_of_month) + + # 종료일이 전체 종료일보다 뒤면 조정 + if loop_end > end_dt: + loop_end = end_dt + + s_str = loop_start.strftime('%Y-%m-%d') + e_str = loop_end.strftime('%Y-%m-%d') + + print(f" [Fetching] {s_str} ~ {e_str} ...", end="", flush=True) + + params = { + 'tab01': '0', + 'tab02': '2', + 'tab03': '2', + 'tord': '2', + 's_day': s_str, + 'e_day': e_str + } + + try: + res = session.get(statics_url, params=params, headers=headers, timeout=15) + res.encoding = 'euc-kr' + + if res.status_code == 200: + html = res.text + tbody_match = re.search(r'(.*?)', html, re.DOTALL) + + if tbody_match: + tbody_content = tbody_match.group(1) + tr_pattern = r'\s*(\d{4}-\d{2}-\d{2})\s*([\d.]+)' + matches = re.findall(tr_pattern, tbody_content) + + if matches: + count = 0 + for date_str, kwh in matches: + generation_kwh = safe_float(kwh) + results.append({ + 'plant_id': plant_id, + 'date': date_str, + 'generation_kwh': generation_kwh, + 'current_kw': 0 + }) + count += 1 + print(f" OK ({count} days)") + else: + print(" No data") + else: + print(" No tbody") + else: + print(f" HTTP {res.status_code}") + + except Exception as e: + print(f" Error: {e}") + + # 다음 기간 설정 + loop_start = loop_end + timedelta(days=1) + + print(f"\n[Total] Collected {len(results)} daily records\n") + return results + + +def fetch_history_monthly(plant_info, start_month, end_month): + """ + Sun-WMS 발전소의 월별 과거 데이터 수집 + + 실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답) + ⚠️ 월별 데이터는 일별 데이터를 월별로 집계 + """ + from datetime import datetime + from dateutil.relativedelta import relativedelta + + results = [] + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '6호기') + + # 시작일자 체크 + plant_start_date = plant_info.get('start_date', '2019-12-30') + plant_start_month = plant_start_date[:7] # YYYY-MM + + # 실제 시작 월은 발전소 가동일 이후로 제한 + if start_month < plant_start_month: + actual_start = plant_start_month + print(f" ℹ 발전소 가동일({plant_start_date}) 이후부터 수집: {actual_start}") + else: + actual_start = start_month + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + + # base_url 추출 + base_url = system.get('base_url', '') + statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php") + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Sun-WMS Monthly] {plant_name} ({actual_start} ~ {end_month})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw} + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 월 단위로 반복 + current_month = datetime.strptime(actual_start, '%Y-%m') + end_month_dt = datetime.strptime(end_month, '%Y-%m') + + while current_month <= end_month_dt: + month_str = current_month.strftime('%Y-%m') + + # 해당 월의 시작일과 마지막일 + first_day = current_month.strftime('%Y-%m-01') + if current_month.month == 12: + last_day = current_month.replace(day=31).strftime('%Y-%m-%d') + else: + next_month = current_month + relativedelta(months=1) + last_day = (next_month - relativedelta(days=1)).strftime('%Y-%m-%d') + + # 일별 엔드포인트로 한 달치 데이터 수집해서 합산 + params = { + 'tab01': '0', + 'tab02': '2', + 'tab03': '2', + 'tord': '2', + 's_day': first_day, + 'e_day': last_day + } + + try: + res = session.get(statics_url, params=params, headers=headers, timeout=10) + res.encoding = 'euc-kr' + + if res.status_code == 200: + # HTML 테이블 파싱 + html = res.text + + # 안의 태그 찾기 + tbody_match = re.search(r'(.*?)', html, re.DOTALL) + if tbody_match: + tbody_content = tbody_match.group(1) + + # 각 파싱 (날짜와 발전량) + tr_pattern = r'\s*(\d{4}-\d{2}-\d{2})\s*([\d.]+)' + matches = re.findall(tr_pattern, tbody_content) + + if matches: + # 일별 데이터를 합산 + monthly_total = sum([safe_float(kwh) for _, kwh in matches]) + + results.append({ + 'plant_id': plant_id, + 'month': month_str, + 'generation_kwh': monthly_total + }) + print(f" ✓ {month_str}: {monthly_total:.1f}kWh (from {len(matches)} days)") + else: + print(f" ⚠ No data for {month_str}") + except Exception as e: + print(f" ✗ Error for {month_str}: {e}") + + # 다음 달로 + current_month += relativedelta(months=1) + + print(f"[Total] Collected {len(results)} monthly records\n") + return results diff --git a/crawlers/sun_wms.py.backup b/crawlers/sun_wms.py.backup new file mode 100644 index 0000000..ad76f41 --- /dev/null +++ b/crawlers/sun_wms.py.backup @@ -0,0 +1,343 @@ +# ========================================== +# crawlers/sun_wms.py - Sun-WMS 크롤러 (6호기) +# ========================================== + +import requests +import re +import time +from .base import create_session + +def fetch_data(plant_info): + """ + Sun-WMS 발전소 데이터 수집 + """ + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + company_name = plant_info.get('company_name', '태양과바람') + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + data_url = system.get('data_url', '') + + session = create_session() + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Referer': 'http://tb6.sun-wms.com/public/main/login.php', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + # 1. 로그인 + login_data = { + 'act': 'loginChk', + 'user_id': payload_id, + 'user_pass': payload_pw + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code != 200: + return [] + except Exception as e: + print(f"❌ {plant_name} 접속 에러: {e}") + return [] + + # 2. 데이터 요청 + try: + timestamp = int(time.time() * 1000) + res = session.get(f"{data_url}?time={timestamp}", headers=headers) + res.encoding = 'euc-kr' + + content = res.text + + match_kw = re.search(r"id=['\"]cur_power['\"].*?value=['\"]([^'\"]+)['\"]", content) + curr_kw = float(match_kw.group(1)) if match_kw else 0.0 + + match_today = re.search(r"id=['\"]today_power['\"].*?value=['\"]([^'\"]+)['\"]", content) + today_kwh = float(match_today.group(1)) if match_today else 0.0 + + status = "🟢 정상" if curr_kw > 0 else "💤 대기" + + return [{ + 'id': plant_id, + 'name': f'{company_name} {plant_name}', + 'kw': curr_kw, + 'today': today_kwh, + 'status': status + }] + + except Exception as e: + print(f"❌ {plant_name} 에러: {e}") + return [] + + +def fetch_history_daily(plant_info, start_date, end_date): + """ + Sun-WMS 발전소의 일별 과거 데이터 수집 + """ + from datetime import datetime, timedelta + from .base import safe_float + import time + + results = [] + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + base_url = system.get('base_url', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Sun-WMS Daily] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw} + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + while current_date <= end_dt: + date_str = current_date.strftime('%Y-%m-%d') + + # 일별 데이터 엔드포인트 (추정) + daily_url = f"{base_url}/public/chart/getDailyData.php?date={date_str}" + + try: + timestamp = int(time.time() * 1000) + res = session.get(f"{daily_url}&time={timestamp}", headers=headers, timeout=10) + res.encoding = 'euc-kr' + + if res.status_code == 200: + data = res.json() + daily_kwh = safe_float(data.get('daily', data.get('today', 0))) + + results.append({ + 'plant_id': plant_id, + 'date': date_str, + 'generation_kwh': daily_kwh + }) + print(f" ✓ {date_str}: {daily_kwh}kWh") + + except Exception as e: + print(f" ✗ {date_str}: {e}") + + current_date += timedelta(days=1) + + print(f"[Total] Collected {len(results)} daily records\n") + return results + + +def fetch_history_monthly(plant_info, start_month, end_month): + """ + Sun-WMS 발전소의 월별 과거 데이터 수집 + """ + from datetime import datetime + from dateutil.relativedelta import relativedelta + from .base import safe_float + import time + + results = [] + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + base_url = system.get('base_url', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Sun-WMS Monthly] {plant_name} ({start_month} ~ {end_month})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw} + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + current_month = datetime.strptime(start_month, '%Y-%m') + end_month_dt = datetime.strptime(end_month, '%Y-%m') + + while current_month <= end_month_dt: + month_str = current_month.strftime('%Y-%m') + + # 월별 데이터 엔드포인트 (추정) + monthly_url = f"{base_url}/public/chart/getMonthlyData.php?month={month_str}" + + try: + timestamp = int(time.time() * 1000) + res = session.get(f"{monthly_url}&time={timestamp}", headers=headers, timeout=10) + res.encoding = 'euc-kr' + + if res.status_code == 200: + data = res.json() + monthly_kwh = safe_float(data.get('monthly', data.get('month', 0))) + + results.append({ + 'plant_id': plant_id, + 'month': month_str, + 'generation_kwh': monthly_kwh + }) + print(f" ✓ {month_str}: {monthly_kwh}kWh") + + except Exception as e: + print(f" ✗ {month_str}: {e}") + + current_month += relativedelta(months=1) + + print(f"[Total] Collected {len(results)} monthly records\n") + return results + + +def fetch_history_hourly(plant_info, start_date, end_date): + """ + Sun-WMS 발전소의 시간대별 과거 데이터 수집 + + Args: + plant_info: dict, 발전소 정보 + start_date: str, 시작일 (YYYY-MM-DD) + end_date: str, 종료일 (YYYY-MM-DD) + + Returns: + list: 시간대별 데이터 레코드 + """ + from datetime import datetime, timedelta + from .base import safe_float + import time + + results = [] + + # 설정 추출 + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + base_url = system.get('base_url', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Sun-WMS History] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + # 로그인 + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Referer': 'http://tb6.sun-wms.com/public/main/login.php', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = { + 'act': 'loginChk', + 'user_id': payload_id, + 'user_pass': payload_pw + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code != 200: + print(f" ✗ Login failed") + return results + + print(f" ✓ Login successful") + + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 날짜 범위 반복 + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + while current_date <= end_dt: + date_str = current_date.strftime('%Y-%m-%d') + print(f"\n[Processing Date] {date_str}") + + # 시간대별 데이터 엔드포인트 (추정) + hourly_url = f"{base_url}/public/chart/getHourlyData.php?date={date_str}" + + try: + timestamp = int(time.time() * 1000) + res = session.get(f"{hourly_url}&time={timestamp}", headers=headers, timeout=10) + res.encoding = 'euc-kr' + + if res.status_code == 200: + data = res.json() + hourly_data = data if isinstance(data, list) else data.get('hourly', []) + + if hourly_data and len(hourly_data) > 0: + print(f" ✓ Found {len(hourly_data)} hourly records") + + for item in hourly_data: + hour = str(item.get('hour', item.get('time', '00'))).zfill(2) + generation_kwh = safe_float(item.get('power', item.get('kwh', 0))) + current_kw = safe_float(item.get('kw', 0)) + + timestamp = f"{date_str} {hour}:00:00" + + results.append({ + 'plant_id': plant_id, + 'timestamp': timestamp, + 'generation_kwh': generation_kwh, + 'current_kw': current_kw + }) + else: + print(f" ⚠ No hourly data for {date_str}") + else: + print(f" ✗ HTTP {res.status_code}") + + except Exception as e: + print(f" ✗ Error: {e}") + + # 다음 날짜로 + current_date += timedelta(days=1) + + print(f"\n{'='*60}") + print(f"[Total] Collected {len(results)} hourly records") + print(f"{'='*60}\n") + + return results diff --git a/crawlers/sun_wms_json.py b/crawlers/sun_wms_json.py new file mode 100644 index 0000000..037b523 --- /dev/null +++ b/crawlers/sun_wms_json.py @@ -0,0 +1,359 @@ +# ========================================== +# crawlers/sun_wms.py - Sun-WMS 크롤러 (6호기) +# ========================================== + +import requests +import re +import time +from .base import create_session, safe_float + +def fetch_data(plant_info): + """ + Sun-WMS 발전소 데이터 수집 + """ + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + company_name = plant_info.get('company_name', '태양과바람') + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + data_url = system.get('data_url', '') + + session = create_session() + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Referer': 'http://tb6.sun-wms.com/public/main/login.php', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + # 1. 로그인 + login_data = { + 'act': 'loginChk', + 'user_id': payload_id, + 'user_pass': payload_pw + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code != 200: + return [] + except Exception as e: + print(f"❌ {plant_name} 접속 에러: {e}") + return [] + + # 2. 데이터 요청 + try: + timestamp = int(time.time() * 1000) + res = session.get(f"{data_url}?time={timestamp}", headers=headers) + res.encoding = 'euc-kr' + + content = res.text + + match_kw = re.search(r"id=['\"]cur_power['\"].*?value=['\"]([^'\"]+)['\"]", content) + curr_kw = float(match_kw.group(1)) if match_kw else 0.0 + + match_today = re.search(r"id=['\"]today_power['\"].*?value=['\"]([^'\"]+)['\"]", content) + today_kwh = float(match_today.group(1)) if match_today else 0.0 + + status = "🟢 정상" if curr_kw > 0 else "💤 대기" + + return [{ + 'id': plant_id, + 'name': f'{company_name} {plant_name}', + 'kw': curr_kw, + 'today': today_kwh, + 'status': status + }] + + except Exception as e: + print(f"❌ {plant_name} 에러: {e}") + return [] + + +def fetch_history_hourly(plant_info, start_date, end_date): + """ + Sun-WMS 발전소의 시간대별 과거 데이터 수집 + + 실제 엔드포인트: /public/statics/statics.php + 파라미터: tab01=0&tab02=1&tab03=2&tord=1&s_day=YYYY-MM-DD + """ + from datetime import datetime, timedelta + + results = [] + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + + # base_url 추출 + base_url = system.get('base_url', '') + if not base_url and 'http' in login_url: + base_url = login_url.split('/public')[0] + + statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php") + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Sun-WMS History] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw} + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 날짜 반복 + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + while current_date <= end_dt: + date_str = current_date.strftime('%Y-%m-%d') + + # 실제 확인된 시간별 엔드포인트 + params = { + 'tab01': '0', + 'tab02': '1', + 'tab03': '2', + 'tord': '1', + 's_day': date_str + } + + try: + res = session.get(statics_url, params=params, headers=headers, timeout=10) + res.encoding = 'euc-kr' + + if res.status_code == 200: + data = res.json() + # 시간별 데이터 파싱 + hourly_data = data.get('data', []) or data.get('list', []) + + if isinstance(hourly_data, list) and len(hourly_data) > 0: + print(f" ✓ Found {len(hourly_data)} hourly records") + + for item in hourly_data: + hour = str(item.get('hour', item.get('time', '00'))).zfill(2) + generation_kwh = safe_float(item.get('generation', item.get('kwh', 0))) + current_kw = safe_float(item.get('power', item.get('kw', 0))) + + timestamp = f"{date_str} {hour}:00:00" + + results.append({ + 'plant_id': plant_id, + 'timestamp': timestamp, + 'generation_kwh': generation_kwh, + 'current_kw': current_kw + }) + else: + print(f" ⚠ No data for {date_str}") + else: + print(f" ✗ HTTP {res.status_code}") + + except Exception as e: + print(f" ✗ Error: {e}") + + current_date += timedelta(days=1) + + print(f"\n{'='*60}") + print(f"[Total] Collected {len(results)} hourly records") + print(f"{'='*60}\n") + + return results + + +def fetch_history_daily(plant_info, start_date, end_date): + """ + Sun-WMS 발전소의 일별 과거 데이터 수집 + + 실제 엔드포인트: /public/statics/statics.php + 파라미터: tab01=0&tab02=2&tab03=2&tord=2&s_day=YYYY-MM-DD&e_day=YYYY-MM-DD + """ + from datetime import datetime + + results = [] + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + + # base_url 추출 + base_url = system.get('base_url', '') + if not base_url and 'http' in login_url: + base_url = login_url.split('/public')[0] + + statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php") + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Sun-WMS Daily] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw} + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 실제 확인된 일별 엔드포인트 + params = { + 'tab01': '0', + 'tab02': '2', + 'tab03': '2', + 'tord': '2', + 's_day': start_date, + 'e_day': end_date + } + + try: + res = session.get(statics_url, params=params, headers=headers, timeout=10) + res.encoding = 'euc-kr' + + if res.status_code == 200: + data = res.json() + # 일별 데이터 파싱 + daily_data = data.get('data', []) or data.get('list', []) + + if isinstance(daily_data, list) and len(daily_data) > 0: + for item in daily_data: + date_str = item.get('date', item.get('day', '')) + generation_kwh = safe_float(item.get('generation', item.get('kwh', 0))) + current_kw = safe_float(item.get('power', item.get('kw', 0))) + + results.append({ + 'plant_id': plant_id, + 'date': date_str, + 'generation_kwh': generation_kwh, + 'current_kw': current_kw + }) + print(f" ✓ {date_str}: {generation_kwh:.2f}kWh") + except Exception as e: + print(f" ✗ Error: {e}") + + print(f"[Total] Collected {len(results)} daily records\n") + return results + + +def fetch_history_monthly(plant_info, start_month, end_month): + """ + Sun-WMS 발전소의 월별 과거 데이터 수집 + + 실제 엔드포인트: /public/statics/statics.php + 파라미터: tab01=0&tab02=3&tab03=2&tord=3&s_day=YYYY-MM&e_day=YYYY-MM + """ + from datetime import datetime + from dateutil.relativedelta import relativedelta + + results = [] + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + + # base_url 추출 + base_url = system.get('base_url', '') + if not base_url and 'http' in login_url: + base_url = login_url.split('/public')[0] + + statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php") + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Sun-WMS Monthly] {plant_name} ({start_month} ~ {end_month})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw} + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 실제 확인된 월별 엔드포인트 + params = { + 'tab01': '0', + 'tab02': '3', + 'tab03': '2', + 'tord': '3', + 's_day': start_month, + 'e_day': end_month + } + + try: + res = session.get(statics_url, params=params, headers=headers, timeout=10) + res.encoding = 'euc-kr' + + if res.status_code == 200: + data = res.json() + # 월별 데이터 파싱 + monthly_data = data.get('data', []) or data.get('list', []) + + if isinstance(monthly_data, list) and len(monthly_data) > 0: + for item in monthly_data: + month_str = item.get('month', item.get('date', '')) + generation_kwh = safe_float(item.get('generation', item.get('kwh', item.get('monthTotal', 0)))) + + results.append({ + 'plant_id': plant_id, + 'month': month_str[:7] if len(month_str) >= 7 else month_str, + 'generation_kwh': generation_kwh + }) + print(f" ✓ {month_str[:7]}: {generation_kwh:.1f}kWh") + except Exception as e: + print(f" ✗ Error: {e}") + + print(f"[Total] Collected {len(results)} monthly records\n") + return results diff --git a/crawlers/sun_wms_old.py b/crawlers/sun_wms_old.py new file mode 100644 index 0000000..ad76f41 --- /dev/null +++ b/crawlers/sun_wms_old.py @@ -0,0 +1,343 @@ +# ========================================== +# crawlers/sun_wms.py - Sun-WMS 크롤러 (6호기) +# ========================================== + +import requests +import re +import time +from .base import create_session + +def fetch_data(plant_info): + """ + Sun-WMS 발전소 데이터 수집 + """ + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + company_name = plant_info.get('company_name', '태양과바람') + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + data_url = system.get('data_url', '') + + session = create_session() + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Referer': 'http://tb6.sun-wms.com/public/main/login.php', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + # 1. 로그인 + login_data = { + 'act': 'loginChk', + 'user_id': payload_id, + 'user_pass': payload_pw + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code != 200: + return [] + except Exception as e: + print(f"❌ {plant_name} 접속 에러: {e}") + return [] + + # 2. 데이터 요청 + try: + timestamp = int(time.time() * 1000) + res = session.get(f"{data_url}?time={timestamp}", headers=headers) + res.encoding = 'euc-kr' + + content = res.text + + match_kw = re.search(r"id=['\"]cur_power['\"].*?value=['\"]([^'\"]+)['\"]", content) + curr_kw = float(match_kw.group(1)) if match_kw else 0.0 + + match_today = re.search(r"id=['\"]today_power['\"].*?value=['\"]([^'\"]+)['\"]", content) + today_kwh = float(match_today.group(1)) if match_today else 0.0 + + status = "🟢 정상" if curr_kw > 0 else "💤 대기" + + return [{ + 'id': plant_id, + 'name': f'{company_name} {plant_name}', + 'kw': curr_kw, + 'today': today_kwh, + 'status': status + }] + + except Exception as e: + print(f"❌ {plant_name} 에러: {e}") + return [] + + +def fetch_history_daily(plant_info, start_date, end_date): + """ + Sun-WMS 발전소의 일별 과거 데이터 수집 + """ + from datetime import datetime, timedelta + from .base import safe_float + import time + + results = [] + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + base_url = system.get('base_url', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Sun-WMS Daily] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw} + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + while current_date <= end_dt: + date_str = current_date.strftime('%Y-%m-%d') + + # 일별 데이터 엔드포인트 (추정) + daily_url = f"{base_url}/public/chart/getDailyData.php?date={date_str}" + + try: + timestamp = int(time.time() * 1000) + res = session.get(f"{daily_url}&time={timestamp}", headers=headers, timeout=10) + res.encoding = 'euc-kr' + + if res.status_code == 200: + data = res.json() + daily_kwh = safe_float(data.get('daily', data.get('today', 0))) + + results.append({ + 'plant_id': plant_id, + 'date': date_str, + 'generation_kwh': daily_kwh + }) + print(f" ✓ {date_str}: {daily_kwh}kWh") + + except Exception as e: + print(f" ✗ {date_str}: {e}") + + current_date += timedelta(days=1) + + print(f"[Total] Collected {len(results)} daily records\n") + return results + + +def fetch_history_monthly(plant_info, start_month, end_month): + """ + Sun-WMS 발전소의 월별 과거 데이터 수집 + """ + from datetime import datetime + from dateutil.relativedelta import relativedelta + from .base import safe_float + import time + + results = [] + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + base_url = system.get('base_url', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Sun-WMS Monthly] {plant_name} ({start_month} ~ {end_month})") + print(f"{'='*60}") + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw} + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code == 200: + print(" ✓ Login successful") + else: + print(" ✗ Login failed") + return results + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + current_month = datetime.strptime(start_month, '%Y-%m') + end_month_dt = datetime.strptime(end_month, '%Y-%m') + + while current_month <= end_month_dt: + month_str = current_month.strftime('%Y-%m') + + # 월별 데이터 엔드포인트 (추정) + monthly_url = f"{base_url}/public/chart/getMonthlyData.php?month={month_str}" + + try: + timestamp = int(time.time() * 1000) + res = session.get(f"{monthly_url}&time={timestamp}", headers=headers, timeout=10) + res.encoding = 'euc-kr' + + if res.status_code == 200: + data = res.json() + monthly_kwh = safe_float(data.get('monthly', data.get('month', 0))) + + results.append({ + 'plant_id': plant_id, + 'month': month_str, + 'generation_kwh': monthly_kwh + }) + print(f" ✓ {month_str}: {monthly_kwh}kWh") + + except Exception as e: + print(f" ✗ {month_str}: {e}") + + current_month += relativedelta(months=1) + + print(f"[Total] Collected {len(results)} monthly records\n") + return results + + +def fetch_history_hourly(plant_info, start_date, end_date): + """ + Sun-WMS 발전소의 시간대별 과거 데이터 수집 + + Args: + plant_info: dict, 발전소 정보 + start_date: str, 시작일 (YYYY-MM-DD) + end_date: str, 종료일 (YYYY-MM-DD) + + Returns: + list: 시간대별 데이터 레코드 + """ + from datetime import datetime, timedelta + from .base import safe_float + import time + + results = [] + + # 설정 추출 + plant_id = plant_info.get('id', 'sunwms-06') + auth = plant_info.get('auth', {}) + system = plant_info.get('system', {}) + plant_name = plant_info.get('name', '6호기') + + payload_id = auth.get('payload_id', '') + payload_pw = auth.get('payload_pw', '') + login_url = system.get('login_url', '') + base_url = system.get('base_url', '') + + session = create_session() + + print(f"\n{'='*60}") + print(f"[Sun-WMS History] {plant_name} ({start_date} ~ {end_date})") + print(f"{'='*60}") + + # 로그인 + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', + 'Referer': 'http://tb6.sun-wms.com/public/main/login.php', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + + login_data = { + 'act': 'loginChk', + 'user_id': payload_id, + 'user_pass': payload_pw + } + + try: + res = session.post(login_url, data=login_data, headers=headers) + if res.status_code != 200: + print(f" ✗ Login failed") + return results + + print(f" ✓ Login successful") + + except Exception as e: + print(f" ✗ Login error: {e}") + return results + + # 날짜 범위 반복 + current_date = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + while current_date <= end_dt: + date_str = current_date.strftime('%Y-%m-%d') + print(f"\n[Processing Date] {date_str}") + + # 시간대별 데이터 엔드포인트 (추정) + hourly_url = f"{base_url}/public/chart/getHourlyData.php?date={date_str}" + + try: + timestamp = int(time.time() * 1000) + res = session.get(f"{hourly_url}&time={timestamp}", headers=headers, timeout=10) + res.encoding = 'euc-kr' + + if res.status_code == 200: + data = res.json() + hourly_data = data if isinstance(data, list) else data.get('hourly', []) + + if hourly_data and len(hourly_data) > 0: + print(f" ✓ Found {len(hourly_data)} hourly records") + + for item in hourly_data: + hour = str(item.get('hour', item.get('time', '00'))).zfill(2) + generation_kwh = safe_float(item.get('power', item.get('kwh', 0))) + current_kw = safe_float(item.get('kw', 0)) + + timestamp = f"{date_str} {hour}:00:00" + + results.append({ + 'plant_id': plant_id, + 'timestamp': timestamp, + 'generation_kwh': generation_kwh, + 'current_kw': current_kw + }) + else: + print(f" ⚠ No hourly data for {date_str}") + else: + print(f" ✗ HTTP {res.status_code}") + + except Exception as e: + print(f" ✗ Error: {e}") + + # 다음 날짜로 + current_date += timedelta(days=1) + + print(f"\n{'='*60}") + print(f"[Total] Collected {len(results)} hourly records") + print(f"{'='*60}\n") + + return results diff --git a/daily_summary.py b/daily_summary.py new file mode 100644 index 0000000..6c999aa --- /dev/null +++ b/daily_summary.py @@ -0,0 +1,200 @@ +# ========================================== +# daily_summary.py - 일일 발전 통계 집계 +# ========================================== +# solar_logs 데이터를 집계하여 daily_stats 테이블에 저장 + +from datetime import datetime, timedelta + +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass + +import pandas as pd +from database import get_supabase_client + + +def get_plant_capacities(client) -> dict: + """plants 테이블에서 용량 정보 조회""" + try: + result = client.table("plants").select("id, capacity").execute() + return {row['id']: row.get('capacity', 99.0) for row in result.data} + except Exception as e: + print(f" ⚠️ 용량 조회 실패: {e}") + return {} + + +def calculate_daily_stats(date_str: str = None): + """ + 특정 날짜의 발전 통계 집계 + + Args: + date_str: 집계 대상 날짜 (YYYY-MM-DD). 미지정 시 오늘. + """ + if date_str is None: + date_str = datetime.now().strftime('%Y-%m-%d') + + print(f"\n📊 [일일 통계 집계] {date_str}") + print("-" * 60) + + client = get_supabase_client() + if not client: + print("❌ Supabase 연결 실패") + return False + + # 1. 용량 정보 조회 + capacities = get_plant_capacities(client) + + # 2. 해당일 로그 조회 + start_dt = f"{date_str}T00:00:00" + end_dt = f"{date_str}T23:59:59" + + try: + result = client.table("solar_logs") \ + .select("plant_id, current_kw, today_kwh, created_at") \ + .gte("created_at", start_dt) \ + .lte("created_at", end_dt) \ + .order("created_at", desc=False) \ + .execute() + + if not result.data: + print(" ⚠️ 해당 날짜의 로그가 없습니다.") + return False + + df = pd.DataFrame(result.data) + + except Exception as e: + print(f" ❌ 로그 조회 실패: {e}") + return False + + # 3. 발전소별 통계 계산 + stats_list = [] + + for plant_id, group in df.groupby('plant_id'): + # 마지막 로그의 today_kwh + total_generation = group['today_kwh'].iloc[-1] if len(group) > 0 else 0 + + # 최대 출력 + peak_kw = group['current_kw'].max() if len(group) > 0 else 0 + + # 이용률 시간 = 발전량 / 용량 + capacity = capacities.get(plant_id, 99.0) + generation_hours = round(total_generation / capacity, 2) if capacity > 0 else 0 + + stats = { + 'plant_id': plant_id, + 'date': date_str, + 'total_generation': round(total_generation, 2), + 'peak_kw': round(peak_kw, 2), + 'generation_hours': generation_hours + } + stats_list.append(stats) + + # 출력 + print(f" {plant_id}: {total_generation:.1f}kWh ({generation_hours:.1f}시간, 최대 {peak_kw:.1f}kW)") + + # 4. daily_stats 테이블에 Upsert + if stats_list: + try: + result = client.table("daily_stats").upsert( + stats_list, + on_conflict="plant_id,date" + ).execute() + + print("-" * 60) + print(f"✅ {len(stats_list)}개 발전소 통계 저장 완료") + + except Exception as e: + print(f" ❌ 저장 실패: {e}") + return False + + return True + + +def calculate_monthly_stats(target_month: str): + """ + 특정 월의 발전 통계 집계 (일간 데이터 합산) + + Args: + target_month: YYYY-MM + """ + print(f"\n📅 [월간 통계 집계] {target_month}") + print("-" * 60) + + client = get_supabase_client() + if not client: + return False + + try: + # 1. 모든 발전소 ID 조회 + plants_res = client.table("plants").select("id").execute() + plant_ids = [p['id'] for p in plants_res.data] + + updated_count = 0 + + for pid in plant_ids: + # 2. 해당 월의 Daily 합계 조회 + d_res = client.table("daily_stats").select("total_generation") \ + .eq("plant_id", pid) \ + .gte("date", f"{target_month}-01") \ + .lte("date", f"{target_month}-31") \ + .execute() + + if not d_res.data: + continue + + total_gen = sum(r.get('total_generation', 0) or 0 for r in d_res.data) + + # 3. Monthly Upsert + client.table("monthly_stats").upsert({ + "plant_id": pid, + "month": target_month, + "total_generation": round(total_gen, 2), + "updated_at": datetime.now().isoformat() + }, on_conflict="plant_id, month").execute() + + print(f" {pid}: {total_gen:.1f}kWh (Month Total)") + updated_count += 1 + + print("-" * 60) + print(f"✅ {updated_count}개 발전소 월간 통계 갱신 완료") + return True + + except Exception as e: + print(f" ❌ 월간 집계 실패: {e}") + return False + + +if __name__ == "__main__": + import sys + from datetime import timedelta + + # 인자로 날짜 지정 가능: python daily_summary.py 2026-01-22 + if len(sys.argv) > 1: + target_date = sys.argv[1] + else: + # 인자 없으면 '어제' 날짜를 기본값으로 사용 + # (새벽에 실행하여 전날 데이터를 마감하는 시나리오) + yesterday = datetime.now() - timedelta(days=1) + target_date = yesterday.strftime('%Y-%m-%d') + print(f"ℹ️ 날짜 미지정 -> 어제({target_date}) 기준으로 집계합니다.") + + # 1. 일간 통계 집계 + success = calculate_daily_stats(target_date) + + # 2. 월말 체크 및 월간 집계 트리거 + # target_date가 해당 월의 마지막 날이면 월간 집계 실행 + if success: + try: + current_dt = datetime.strptime(target_date, '%Y-%m-%d') + import calendar + last_day = calendar.monthrange(current_dt.year, current_dt.month)[1] + + if current_dt.day == last_day: + target_month = current_dt.strftime('%Y-%m') + print(f"\n🔔 월말({target_date}) 감지 -> {target_month} 월간 집계 실행") + calculate_monthly_stats(target_month) + except Exception as e: + print(f"⚠️ 월간 집계 트리거 오류: {e}") + diff --git a/database.py b/database.py new file mode 100644 index 0000000..8cbed22 --- /dev/null +++ b/database.py @@ -0,0 +1,313 @@ +# ========================================== +# database.py - Supabase 연동 +# ========================================== + +import os +from datetime import datetime + +# 환경 변수에서 Supabase 설정 로드 +SUPABASE_URL = os.getenv('SUPABASE_URL', '') +SUPABASE_KEY = os.getenv('SUPABASE_KEY', '') + +print(f"DEBUG: SUPABASE_URL prefix: {SUPABASE_URL[:15] if SUPABASE_URL else 'None'}") + +_supabase_client = None + + +def get_supabase_client(): + """Supabase 클라이언트 싱글턴 반환""" + global _supabase_client + + if _supabase_client is None: + if not SUPABASE_URL or not SUPABASE_KEY: + print("⚠️ SUPABASE_URL 또는 SUPABASE_KEY가 설정되지 않았습니다.") + print(" .env 파일을 확인하거나 환경 변수를 설정하세요.") + return None + + try: + from supabase import create_client + _supabase_client = create_client(SUPABASE_URL, SUPABASE_KEY) + print("✅ Supabase 연결 성공") + except ImportError: + print("⚠️ supabase 패키지가 설치되지 않았습니다.") + print(" pip install supabase 실행하세요.") + return None + except Exception as e: + print(f"⚠️ Supabase 연결 실패: {e}") + return None + + return _supabase_client + +def save_to_supabase(data_list): + """ + 수집된 발전 데이터를 Supabase solar_logs 테이블에 저장 + + Args: + data_list: [{'id': 'nrems-01', 'name': '...', 'kw': 10.5, 'today': 100.0, 'status': '...'}] + + Returns: + bool: 저장 성공 여부 + """ + if not data_list: + print("[DB] 저장할 데이터가 없습니다.") + return False + + client = get_supabase_client() + if client is None: + print("[DB 저장 생략] Supabase 연결 없음") + return False + + try: + # 저장할 레코드 생성 + records = [] + for item in data_list: + plant_id = item.get('id', '') + + # id가 없는 경우 건너뛰기 + if not plant_id: + print(f" ⚠️ '{item.get('name', 'Unknown')}' ID 없음, 건너뜀") + continue + + # 한국 시간(KST) 타임스탬프 생성 + from datetime import timezone, timedelta + kst = timezone(timedelta(hours=9)) + kst_now = datetime.now(kst).isoformat() + + record = { + 'plant_id': plant_id, + 'current_kw': float(item.get('kw', 0)), + 'today_kwh': float(item.get('today', 0)), + 'status': item.get('status', ''), + 'created_at': kst_now # 한국 시간으로 저장 + } + records.append(record) + + if not records: + print("[DB] 저장할 유효한 레코드가 없습니다.") + return False + + # Supabase에 일괄 삽입 (solar_logs) + result = client.table("solar_logs").insert(records).execute() + + print(f"✅ [DB] Supabase 저장 완료: {len(records)}건 (solar_logs)") + + # daily_stats 테이블 업데이트 (Upsert) + # 오늘 날짜(KST) 기준, 현재 수집된 today_kwh가 기존 값보다 크거나 같으면 업데이트 + # 하지만 보통 today_kwh는 누적값이므로 간단하게 upsert 처리 + daily_records = [] + kst_date_str = datetime.now(timezone(timedelta(hours=9))).strftime("%Y-%m-%d") + + for item in data_list: + plant_id = item.get('id', '') + if not plant_id: continue + + today_val = float(item.get('today', 0)) + + # 0인 경우는 저장하지 않거나(새벽), 기존 값을 덮어쓰지 않도록 주의해야 함 + # 하지만 발전소 데이터 보정을 위해 0이어도 일단 기록하거나, + # 아니면 max 값을 유지하는 로직이 필요할 수 있음. + # 여기서는 Upsert로 덮어쓰되, DB 트리거가 없다면 마지막 값이 저장됨. + # 보통 크롤링은 누적값이므로 마지막 값이 그날의 최종값에 가까움. + + daily_records.append({ + "plant_id": plant_id, + "date": kst_date_str, + "total_generation": today_val, + "created_at": kst_now, # 생성/수정일 + "updated_at": kst_now + }) + + if daily_records: + # upsert: plant_id, date가 unique constraint여야 함 + try: + # ignore_duplicates=False -> 업데이트 + # on_conflict="plant_id, date" (Supabase/PG 설정에 따라 다름, 보통 PK나 UK 기준) + stats_result = client.table("daily_stats").upsert(daily_records, on_conflict="plant_id, date").execute() + print(f"✅ [DB] daily_stats 업데이트 완료: {len(daily_records)}건") + except Exception as e: + print(f"⚠️ [DB] daily_stats 업데이트 실패: {e}") + + for r in records: + print(f" → {r['plant_id']}: {r['current_kw']} kW / {r['today_kwh']} kWh") + + return True + + except Exception as e: + print(f"❌ [DB] Supabase 저장 실패: {e}") + return False + +def save_to_console(data_list): + """콘솔에 데이터 출력""" + if not data_list: + print("⚠️ 출력할 데이터가 없습니다.") + return + + print("\n" + "=" * 75) + print("📊 [실시간 통합 현황판]") + print("=" * 75) + print(f"{'발전소명':<20} | {'현재출력(kW)':>12} | {'금일발전(kWh)':>12} | {'상태'}") + print("-" * 75) + + total_kw = 0 + total_today = 0 + + for d in data_list: + name = d.get('name', 'N/A') + kw = d.get('kw', 0) + today = d.get('today', 0) + status = d.get('status', '') + + total_kw += kw + total_today += today + + print(f"{name:<20} | {kw:>12.2f} | {today:>12.2f} | {status}") + + print("-" * 75) + print(f"{'합계':<20} | {total_kw:>12.2f} | {total_today:>12.2f} |") + print("=" * 75) + +def save_history(data_list, data_type='hourly'): + """ + 과거 데이터 저장 (Hourly, Daily, Monthly) + + Args: + data_list: 데이터 리스트 + data_type: 'hourly', 'daily', 'monthly' + """ + if not data_list: + return False + + client = get_supabase_client() + if client is None: + return False + + try: + table_name = "" + records = [] + + if data_type == 'hourly': + table_name = "solar_logs" + for item in data_list: + # hourly 데이터는 timestamp 키를 가짐 + ts = item.get('timestamp') + if ts: + ts_iso = ts.replace(' ', 'T') + # Check if future (simple string comparison works for ISO format if consistent, but datetime is safer) + # KST aware comparison + from datetime import timezone, timedelta + kst = timezone(timedelta(hours=9)) + now_kst = datetime.now(kst) + + try: + # ts example: 2026-01-27 14:00:00. Assume input is local time (KST) + # We convert it to aware datetime + dt_ts = datetime.fromisoformat(ts_iso) + if dt_ts.tzinfo is None: + dt_ts = dt_ts.replace(tzinfo=kst) + + if dt_ts > now_kst: + continue # Skip future data + except ValueError: + pass # robust date parsing needed if format varies + + # Ensure timezone is sent to Supabase to prevent UTC assumption + final_created_at = dt_ts.isoformat() + + records.append({ + 'plant_id': item['plant_id'], + 'created_at': final_created_at, + 'current_kw': float(item.get('current_kw', 0) or item.get('generation_kwh', 0)), + 'today_kwh': float(item.get('generation_kwh', 0)), + 'status': 'History' + }) + + elif data_type == 'daily': + table_name = "daily_stats" + for item in data_list: + records.append({ + 'plant_id': item['plant_id'], + 'date': item['date'], + 'total_generation': float(item.get('generation_kwh', 0)) + # 'updated_at': datetime.now().isoformat() + }) + + elif data_type == 'monthly': + table_name = "monthly_stats" + for item in data_list: + records.append({ + 'plant_id': item['plant_id'], + 'month': item['month'], # YYYY-MM + 'total_generation': float(item.get('generation_kwh', 0)), + 'updated_at': datetime.now().isoformat() + }) + + if not records: + return False + + # upsert 사용 + if data_type == 'hourly': + # hourly는 시간값 중복 시 업데이트? solar_logs는 보통 log table이라 pk가 id일 수 있음. + # 하지만 과거 내역이므로 중복 방지가 필요. created_at 기준? + # solar_logs에 unique constraints가 plant_id, created_at에 있는지 불확실. + # 일단 insert로 시도 + client.table(table_name).insert(records).execute() + elif data_type == 'daily': + client.table(table_name).upsert(records, on_conflict="plant_id, date").execute() + + # [Auto Update] Daily 데이터 저장 시 Monthly 통계 자동 갱신 + # 1. 업데이트된 월 목록 추출 + updated_months = set() + for rec in records: + try: + # date: YYYY-MM-DD + month_key = rec['date'][:7] + updated_months.add((rec['plant_id'], month_key)) + except: + pass + + if updated_months: + monthly_upserts = [] + for (pid, m_key) in updated_months: + # 2. 해당 월의 Daily 합계 조회 (DB Aggregation) + # start_date ~ end_date 범위 쿼리가 필요하지만, + # supabase-py에서는 .select('total_generation.sum()') 같은 게 잘 안됨. + # 그냥 해당 월 데이터를 가져와서 파이썬에서 합산 (데이터 최대 31개라 매우 가벼움) + + start_d = f"{m_key}-01" + # end_d 로직 복잡하므로 그냥 문자열 필터로 (YYYY-MM-01 ~ YYYY-MM-31) + # like는 지원 안 할 수 있으므로 date >= start AND date <= end + # 다음달 1일 전까지 + + # 쿼리: select total_generation where plant_id=X and date like 'YYYY-MM%' + # but 'like' operator might differ. + # Simpler: gte "YYYY-MM-01", lte "YYYY-MM-31" + + d_res = client.table("daily_stats").select("total_generation") \ + .eq("plant_id", pid) \ + .gte("date", f"{m_key}-01") \ + .lte("date", f"{m_key}-31") \ + .execute() + + total_gen = sum(r['total_generation'] or 0 for r in d_res.data) + + monthly_upserts.append({ + "plant_id": pid, + "month": m_key, + "total_generation": round(total_gen, 2), + "updated_at": datetime.now().isoformat() + }) + + # 3. Monthly Upsert + if monthly_upserts: + client.table("monthly_stats").upsert(monthly_upserts, on_conflict="plant_id, month").execute() + print(f" 🔄 [Sync] {len(monthly_upserts)}개월치 Monthly Stats 자동 갱신 완료") + + elif data_type == 'monthly': + client.table(table_name).upsert(records, on_conflict="plant_id, month").execute() + + print(f"✅ [History] {data_type} 데이터 {len(records)}건 저장 완료") + return True + + except Exception as e: + print(f"❌ [History] 저장 실패 ({data_type}): {e}") + return False diff --git a/fetch_history.py b/fetch_history.py new file mode 100644 index 0000000..fd820fc --- /dev/null +++ b/fetch_history.py @@ -0,0 +1,138 @@ + +import sys +import os +import importlib +from datetime import datetime, timedelta +from dateutil.relativedelta import relativedelta +from dotenv import load_dotenv + +# .env 로드 +load_dotenv() + +# Windows 인코딩 문제 해결 +if sys.platform.startswith('win'): + sys.stdout.reconfigure(encoding='utf-8') + sys.stderr.reconfigure(encoding='utf-8') + +# 프로젝트 루트 경로 추가 +current_dir = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(current_dir) + +from config import get_all_plants +from database import save_history + +def get_plant_config(target_id): + plants = get_all_plants() + for p in plants: + # 일반 매칭 + if p.get('id') == target_id: + return p + + # NREMS 분리 세대 매칭 (nrems-01, nrems-02) + if p.get('options', {}).get('is_split'): + if target_id == 'nrems-01': + p['id'] = 'nrems-01' + p['options']['split_index'] = 1 + return p + elif target_id == 'nrems-02': + p['id'] = 'nrems-02' + p['options']['split_index'] = 2 + return p + return None + +def fetch_and_save(plant_config): + plant_id = plant_config['id'] + plant_type = plant_config['type'] + plant_name = plant_config['name'] + start_date_str = plant_config.get('start_date', '2020-01-01') + + print(f"🚀 [{plant_name}] 과거 데이터 수집 시작 ({plant_id})") + print(f" 타입: {plant_type}, 가동개시일: {start_date_str}") + + # 크롤러 모듈 동적 임포트 + try: + crawler_module = importlib.import_module(f"crawlers.{plant_type}") + except ImportError: + print(f"❌ 크롤러 모듈을 찾을 수 없습니다: crawlers/{plant_type}.py") + return + + now = datetime.now() + today_str = now.strftime("%Y-%m-%d") + current_year = now.year + current_month = now.month + + # 1. 시간별 데이터 (Hourly): 이번 달 1일 ~ 오늘 + # (역순으로 가져오라고 했지만, 크롤러는 start->end로 동작하므로 범위로 호출) + try: + h_start = now.replace(day=1).strftime("%Y-%m-%d") + h_end = today_str + print(f"\n⏳ [Hourly] 수집 : {h_start} ~ {h_end}") + + if hasattr(crawler_module, 'fetch_history_hourly'): + hourly_data = crawler_module.fetch_history_hourly(plant_config, h_start, h_end) + if hourly_data: + save_history(hourly_data, 'hourly') + else: + print(" 데이터 없음") + else: + print(f" {plant_type}는 시간별 이력 수집을 지원하지 않음") + + except Exception as e: + print(f"❌ [Hourly] 에러: {e}") + + # 2. 일별 데이터 (Daily): 발전소 가동일 ~ 오늘 + # API 서버가 daily_stats를 집계하여 월/년 통계를 보여주므로, daily 데이터를 전체 기간 수집해야 함. + try: + # d_start = f"{current_year}-01-01" + d_start = start_date_str # 가동 시작일부터 수집 + d_end = today_str + print(f"\n⏳ [Daily] 수집 : {d_start} ~ {d_end}") + + if hasattr(crawler_module, 'fetch_history_daily'): + daily_data = crawler_module.fetch_history_daily(plant_config, d_start, d_end) + if daily_data: + save_history(daily_data, 'daily') + else: + print(" 데이터 없음") + else: + print(f" {plant_type}는 일별 이력 수집을 지원하지 않음") + + except Exception as e: + print(f"❌ [Daily] 에러: {e}") + + # 3. 월별 데이터 (Monthly): 사용 안함 (API가 daily_stats 집계 사용) + # try: + # m_start_dt = datetime.strptime(start_date_str, "%Y-%m-%d") + # m_start = m_start_dt.strftime("%Y-%m") + # m_end = now.strftime("%Y-%m") + # print(f"\n⏳ [Monthly] 수집 : {m_start} ~ {m_end}") + # + # if hasattr(crawler_module, 'fetch_history_monthly'): + # monthly_data = crawler_module.fetch_history_monthly(plant_config, m_start, m_end) + # if monthly_data: + # save_history(monthly_data, 'monthly') + # else: + # print(" 데이터 없음") + # else: + # print(f" {plant_type}는 월별 이력 수집을 지원하지 않음") + # + # except Exception as e: + # print(f"❌ [Monthly] 에러: {e}") + + except Exception as e: + print(f"❌ [Monthly] 에러: {e}") + + print(f"\n✅ [{plant_name}] 모든 작업 완료") + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python fetch_history.py ") + sys.exit(1) + + target_plant_id = sys.argv[1] + cfg = get_plant_config(target_plant_id) + + if cfg: + fetch_and_save(cfg) + else: + print(f"❌ 설정을 찾을 수 없습니다: {target_plant_id}") diff --git a/main.py b/main.py new file mode 100644 index 0000000..228b6a3 --- /dev/null +++ b/main.py @@ -0,0 +1,158 @@ +# ========================================== +# main.py - 태양광 발전 통합 관제 시스템 +# ========================================== + +import re +from datetime import datetime + +# 환경 변수 로드 (최상단에서 실행) +try: + from dotenv import load_dotenv + load_dotenv() + print("✅ 환경 변수 로드 완료") +except ImportError: + print("⚠️ python-dotenv가 설치되지 않았습니다. 환경 변수를 직접 설정하세요.") + +from config import get_all_plants +from database import save_to_supabase, save_to_console +from crawlers import get_crawler +from crawler_manager import CrawlerManager + +# 스마트 스케줄러 초기화 +crawler_manager = CrawlerManager() + +def extract_unit_number(name): + """발전소 이름에서 호기 번호 추출 (정렬용)""" + match = re.search(r'(\d+)호기', name) + if match: + return int(match.group(1)) + return 999 + +def integrated_monitoring(save_to_db=True, company_filter=None, force_run=False): + """ + 통합 모니터링 실행 + + Args: + save_to_db: True면 Supabase에 저장 + company_filter: 특정 업체만 필터링 (예: 'sunwind') + force_run: True면 스케줄러 무시하고 강제 실행 + """ + now_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + print(f"\n🚀 [통합 관제 시스템] 데이터 수집 시작... ({now_str})") + print("-" * 75) + + # 평탄화된 발전소 목록 가져오기 + all_plants = get_all_plants() + + # 업체 필터링 (옵션) + if company_filter: + all_plants = [p for p in all_plants if p['company_id'] == company_filter] + print(f"📌 필터 적용: {company_filter}") + + total_results = [] + skipped_count = 0 + + for plant in all_plants: + plant_type = plant['type'] + plant_name = plant.get('display_name', plant.get('name', 'Unknown')) + company_id = plant.get('company_id', '') + company_name = plant.get('company_name', '') + + # 크롤링 결과에서 생성되는 site_id 목록 (1,2호기 분리 처리 고려) + is_split = plant.get('options', {}).get('is_split', False) + if is_split: + site_ids = ['nrems-01', 'nrems-02'] + else: + site_ids = [plant.get('id', '')] + + # 스마트 스케줄러 확인 (force_run이 아닌 경우) + if not force_run: + # 모든 site_id에 대해 should_run 확인 (하나라도 실행해야 하면 실행) + should_run_any = False + for site_id in site_ids: + if site_id: + crawler_manager.register_site(site_id) + if crawler_manager.should_run(site_id): + should_run_any = True + break + + if not should_run_any: + print(f" ⏭️ [{plant_type.upper()}] {plant_name} 스킵 (스케줄 외)") + skipped_count += 1 + continue + + print(f"📡 [{plant_type.upper()}] {company_name} - {plant_name} 수집 중...") + + try: + crawler_func = get_crawler(plant_type) + if crawler_func: + data = crawler_func(plant) + if data: + # company_id, company_name 주입 + for item in data: + item['company_id'] = company_id + item['company_name'] = company_name + + # 크롤링 성공 시 실행 기록 + item_id = item.get('id', '') + if item_id: + crawler_manager.record_run(item_id) + + total_results.extend(data) + else: + print(f" ⚠️ 알 수 없는 크롤러 타입: {plant_type}") + except Exception as e: + print(f" ❌ {plant_name} 실패: {e}") + + # 정렬 (호기 번호 순) + total_results.sort(key=lambda x: extract_unit_number(x['name'])) + + # 중복 제거 (company_id + id 조합) + seen_keys = set() + unique_results = [] + for item in total_results: + unique_key = f"{item.get('company_id', '')}_{item.get('id', '')}" + if unique_key not in seen_keys: + seen_keys.add(unique_key) + unique_results.append(item) + total_results = unique_results + + print("-" * 75) + + if skipped_count > 0: + print(f"📊 스킵된 사이트: {skipped_count}개 (스케줄 외)") + + if total_results: + # 콘솔 출력 + save_to_console(total_results) + + # DB 저장 + if save_to_db: + save_to_supabase(total_results) + + # 이상 감지 로직 + current_hour = datetime.now().hour + if 10 <= current_hour <= 17: + issues = [d['name'] for d in total_results if d.get('kw', 0) == 0] + if issues: + print("\n🚨 [이상 감지 리포트]") + for name in issues: + print(f" ⚠️ 경고: '{name}' 발전량이 0입니다! 확인 필요.") + else: + print("\n ✅ 현재 모든 발전소가 정상 가동 중입니다.") + else: + print("❌ 수집된 데이터가 없습니다.") + + return total_results + +if __name__ == "__main__": + import sys + + # 인자 처리: --force 옵션으로 스케줄러 무시 + force_run = '--force' in sys.argv or '-f' in sys.argv + + if force_run: + print("⚡ [강제 실행 모드] 스케줄러 무시하고 모든 사이트 크롤링") + + integrated_monitoring(save_to_db=True, force_run=force_run) + diff --git a/sync_plants.py b/sync_plants.py new file mode 100644 index 0000000..ee48436 --- /dev/null +++ b/sync_plants.py @@ -0,0 +1,91 @@ +# ========================================== +# sync_plants.py - 발전소 정보 동기화 +# ========================================== +# config.py의 발전소 정보를 Supabase plants 테이블에 Upsert + +from datetime import datetime + +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass + +from config import get_all_plants +from database import get_supabase_client + + +def sync_plants(): + """ + 로컬 config.py의 발전소 정보를 Supabase plants 테이블에 동기화 + """ + print(f"\n🔄 [발전소 동기화] 시작... ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})") + print("-" * 60) + + client = get_supabase_client() + if not client: + print("❌ Supabase 연결 실패") + return False + + plants = get_all_plants() + + # 중복 제거 (is_split인 1,2호기는 별도 처리) + unique_plants = {} + for plant in plants: + plant_id = plant.get('id', '') + is_split = plant.get('options', {}).get('is_split', False) + + if is_split: + # 1, 2호기 분리 (용량 N빵) + total_capacity = plant.get('capacity_kw', 100.0) + unit_capacity = total_capacity / 2 + start_date = plant.get('start_date', '') + + unique_plants['nrems-01'] = { + 'id': 'nrems-01', + 'name': f"{plant.get('company_name', '')} 1호기", + 'type': plant.get('type', ''), + 'capacity': unit_capacity, + 'constructed_at': start_date, + 'company_id': 1 + } + unique_plants['nrems-02'] = { + 'id': 'nrems-02', + 'name': f"{plant.get('company_name', '')} 2호기", + 'type': plant.get('type', ''), + 'capacity': unit_capacity, + 'constructed_at': start_date, + 'company_id': 1 + } + elif plant_id: + unique_plants[plant_id] = { + 'id': plant_id, + 'name': f"{plant.get('company_name', '')} {plant.get('name', '')}", + 'type': plant.get('type', ''), + 'capacity': plant.get('capacity_kw', 0.0), + 'constructed_at': plant.get('start_date', ''), + 'company_id': 1 + } + + success_count = 0 + for plant_id, plant_data in unique_plants.items(): + try: + result = client.table("plants").upsert( + plant_data, + on_conflict="id" + ).execute() + + print(f" ✅ {plant_data['name']} (용량: {plant_data['capacity']} kW)") + success_count += 1 + + except Exception as e: + print(f" ❌ {plant_id} 실패: {e}") + + print("-" * 60) + print(f"✅ 동기화 완료: {success_count}/{len(unique_plants)}개") + + return True + + +if __name__ == "__main__": + sync_plants() diff --git a/tests/check_missing_dates.py b/tests/check_missing_dates.py new file mode 100644 index 0000000..833df1a --- /dev/null +++ b/tests/check_missing_dates.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +1월 28, 29일 데이터 확인 스크립트 +""" + +from datetime import datetime +import sys +import os + +print("Starting checks...", flush=True) + +# Add parent directory to path to import modules +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(current_dir) +sys.path.append(parent_dir) + +print(f"Current dir: {current_dir}", flush=True) +print(f"Parent dir: {parent_dir}", flush=True) +print(f"Sys path: {sys.path}", flush=True) + +try: + from crawlers import nrems, hyundai, kremc, sun_wms, cmsolar + from config import SYSTEM_CONSTANTS + print("Imports successful", flush=True) +except Exception as e: + print(f"Import failed: {e}", flush=True) + import traceback + traceback.print_exc() + sys.exit(1) + +def check_dates(plant_config, crawler_module, start_date, end_date): + plant_name = plant_config['name'] + print(f"\n[{plant_name}] 데이터 확인: {start_date} ~ {end_date}") + + try: + # Check daily data + daily_data = crawler_module.fetch_history_daily(plant_config, start_date, end_date) + + if not daily_data: + print(" ❌ 데이터 없음") + return + + print(f" 총 {len(daily_data)}일 데이터 수신") + for record in daily_data: + print(f" - 날짜: {record.get('date', 'Unknown')}, 발전량: {record.get('generation_kwh', 0)} kWh") + + except Exception as e: + print(f" ❌ 오류 발생: {str(e)}") + # import traceback + # traceback.print_exc() + +def main(): + print(">>> 1월 28, 29일 데이터 확인 <<<") + + # Dates to check + start_date = '2026-01-28' + end_date = '2026-01-29' + + test_plants = [ + # NREMS 1,2호기 (분리) + ({'id': 'nrems-01', 'name': '1호기', 'type': 'nrems', + 'auth': {'pscode': 'duce2023072288'}, + 'options': {'is_split': True, 'unit_id': 1}, + 'system': SYSTEM_CONSTANTS['nrems']}, nrems), + + ({'id': 'nrems-02', 'name': '2호기', 'type': 'nrems', + 'auth': {'pscode': 'duce2023072288'}, + 'options': {'is_split': True, 'unit_id': 2}, + 'system': SYSTEM_CONSTANTS['nrems']}, nrems), + + # NREMS 3호기 + ({'id': 'nrems-03', 'name': '3호기', 'type': 'nrems', + 'auth': {'pscode': 'dc2023121086'}, + 'options': {}, + 'system': SYSTEM_CONSTANTS['nrems']}, nrems), + + # NREMS 4호기 + ({'id': 'nrems-04', 'name': '4호기', 'type': 'nrems', + 'auth': {'pscode': 'duce2023072269'}, + 'options': {}, + 'system': SYSTEM_CONSTANTS['nrems']}, nrems), + + # NREMS 9호기 + ({'id': 'nrems-09', 'name': '9호기', 'type': 'nrems', + 'auth': {'pscode': 'a2020061008'}, + 'options': {}, + 'system': SYSTEM_CONSTANTS['nrems']}, nrems), + + # KREMC 5호기 + ({'id': 'kremc-05', 'name': '5호기', 'type': 'kremc', + 'auth': {'user_id': '서대문도서관', 'password': 'sunhope5!'}, + 'options': {'cid': '10013000376', 'cityProvCode': '11', 'rgnCode': '11410', + 'dongCode': '1141011700', 'enso_type_code': '15001'}, + 'system': SYSTEM_CONSTANTS['kremc']}, kremc), + + # Sun-WMS 6호기 + ({'id': 'sunwms-06', 'name': '6호기', 'type': 'sun_wms', + 'auth': {'payload_id': 'kc0fXUW0LUm2wZa+2NQI0Q==', 'payload_pw': 'PGXjU6ib2mKYwtrh2i3fIQ=='}, + 'options': {}, + 'system': SYSTEM_CONSTANTS['sun_wms']}, sun_wms), + + # Hyundai 8호기 + ({'id': 'hyundai-08', 'name': '8호기', 'type': 'hyundai', + 'auth': {'user_id': 'epecoop', 'password': 'sunhope0419', 'site_id': 'M0494'}, + 'options': {}, + 'system': SYSTEM_CONSTANTS['hyundai']}, hyundai), + + # CMSolar 10호기 (Fix login info from verify_data.py if valid, otherwise use config.py's) + # Using config.py's info but updated with values seen in verify_data.py which seemed to be used for testing + # verify_data.py had: 'login_id': 'smart3131', 'password': 'ehdrb!123' + # config.py has: 'login_id': 'sy7144', 'login_pw': 'sy7144' + # I should probably use what is in config.py OR verify_data.py. Let's try config.py first as it is the source of truth usually, + # BUT wait, verify_data.py was likely used recently. + # Let's check config.py again. Config.py has 'sy7144'. verify_data.py has 'smart3131'. + # The user history mentioned "Debugging Real-time Crawlers" and "CMSolar". + # Let's check `crawler/crawlers/cmsolar.py` to see what it expects or if there are hardcoded overrides. + ({'id': 'cmsolar-10', 'name': '10호기', 'type': 'cmsolar', + 'auth': {'login_id': 'sy7144', 'login_pw': 'sy7144', 'site_no': '834'}, + 'options': {}, + 'system': SYSTEM_CONSTANTS['cmsolar']}, cmsolar), + ] + + for plant_config, crawler_module in test_plants: + check_dates(plant_config, crawler_module, start_date, end_date) + +if __name__ == '__main__': + main() diff --git a/tests/check_today_10.py b/tests/check_today_10.py new file mode 100644 index 0000000..5d317fb --- /dev/null +++ b/tests/check_today_10.py @@ -0,0 +1,26 @@ + +import sys +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from dotenv import load_dotenv +load_dotenv() +from database import get_supabase_client +from datetime import datetime, timezone, timedelta + +def check_today(): + c = get_supabase_client() + # Today in KST + kst = timezone(timedelta(hours=9)) + now = datetime.now(kst) + today_start = now.replace(hour=0, minute=0, second=0, microsecond=0) + print(f"Checking data since {today_start.isoformat()} (KST)") + + res = c.table('solar_logs').select('created_at, current_kw, today_kwh, status').eq('plant_id', 'cmsolar-10').gte('created_at', today_start.isoformat()).order('created_at', desc=True).execute() + + print(f"Found {len(res.data)} records for today:") + for item in res.data: + print(f"{item['created_at']} | {item.get('current_kw')} kW") + +if __name__ == "__main__": + check_today() diff --git a/tests/debug_cmsolar.py b/tests/debug_cmsolar.py new file mode 100644 index 0000000..4e7d81f --- /dev/null +++ b/tests/debug_cmsolar.py @@ -0,0 +1,51 @@ + +import requests +from config import get_all_plants +from crawlers.cmsolar import fetch_data +from crawlers.base import create_session + +def debug_cmsolar(): + plants = get_all_plants() + target = next((p for p in plants if p['id'] == 'cmsolar-10'), None) + + if not target: + print("Plant 10 not found") + return + + print(f"Debug target: {target['name']}") + + # Manually reproduce fetch_data logic to see raw response + auth = target.get('auth', {}) + system = target.get('system', {}) + + login_id = auth.get('login_id', '') # config.py uses login_id? checking cmsolar.py it uses payload_id or auth get directly. + # config.py for cmsolar-10: + # 'auth': { 'login_id': 'sy7144', 'login_pw': 'sy7144', 'site_no': '834' } + + # cmsolar.py fetch_data: + # login_id = auth.get('payload_id', '') -> THIS MIGHT BE WRONG if config keys are login_id + + # Check config.py again for cmsolar-10 auth keys. + # Lines 154-158 in config.py: + # 'auth': { 'login_id': 'sy7144', 'login_pw': 'sy7144', 'site_no': '834' } + + # cmsolar.py Lines 20-22: + # login_id = auth.get('payload_id', '') + # login_pw = auth.get('payload_pw', '') + # site_no = auth.get('site_no', '') + + # WAIT! 'payload_id' vs 'login_id'. + # If the code expects 'payload_id' but config provides 'login_id', then login_id will be empty string. + # This might be the bug. + + print(f"Auth keys in config: {list(auth.keys())}") + + # Let's try to run fetch_data and catch exception + try: + result = fetch_data(target) + print(f"Result: {result}") + except Exception as e: + print(f"Exception: {e}") + +if __name__ == "__main__": + debug_cmsolar() diff --git a/tests/debug_cmsolar_realtime.py b/tests/debug_cmsolar_realtime.py new file mode 100644 index 0000000..d9cbc1d --- /dev/null +++ b/tests/debug_cmsolar_realtime.py @@ -0,0 +1,85 @@ + +import sys +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from dotenv import load_dotenv +load_dotenv() + +from config import get_all_plants +from crawlers.base import create_session + +def debug_cmsolar_realtime(): + plants = get_all_plants() + target = next((p for p in plants if p['id'] == 'cmsolar-10'), None) + + if not target: + print("Plant 10 not found") + return + + print(f"Debug target: {target['name']}") + + # Extract info + auth = target.get('auth', {}) + system = target.get('system', {}) + + login_id = auth.get('login_id', '') + login_pw = auth.get('login_pw', '') + site_no = auth.get('site_no', '') + login_url = system.get('login_url', '') + data_url = system.get('data_url', '') + + print(f"Login ID: {login_id}") + print(f"Login URL: {login_url}") + print(f"Data URL: {data_url}") + + session = create_session() + + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Content-Type': 'application/x-www-form-urlencoded' + } + + # Login + login_data = { + 'login_id': login_id, + 'login_pw': login_pw, + 'site_no': site_no + } + + print("Logging in...") + try: + res = session.post(login_url, data=login_data, headers=headers) + print(f"Login Status: {res.status_code}") + + # Site selection + base_url = "http://www.cmsolar2.kr" + change_url = f"{base_url}/change.php?site={site_no}" + print(f"Selecting site via {change_url}...") + session.get(change_url, headers=headers) + + except Exception as e: + print(f"Login/Select Error: {e}") + return + + # Fetch Data + real_data_url = f"{base_url}/plant/sub/idx_ok.php?mode=getPlant" + print(f"Fetching data from {real_data_url}...") + + try: + res = session.get(real_data_url, headers=headers) + print(f"Data Status: {res.status_code}") + # print(f"Data Content-Type: {res.headers.get('Content-Type')}") + print(f"Data Response:\n{res.text}") + + try: + json_data = res.json() + print(f"JSON parsed successfully.") + except Exception as e: + print(f"JSON Parse Error: {e}") + + except Exception as e: + print(f"Data Fetch Error: {e}") + +if __name__ == "__main__": + debug_cmsolar_realtime() diff --git a/tests/debug_db_check.py b/tests/debug_db_check.py new file mode 100644 index 0000000..de0309f --- /dev/null +++ b/tests/debug_db_check.py @@ -0,0 +1,14 @@ + +from dotenv import load_dotenv +load_dotenv() +from database import get_supabase_client + +def check_db(): + c = get_supabase_client() + res = c.table('solar_logs').select('created_at, current_kw, today_kwh').eq('plant_id', 'cmsolar-10').order('created_at', desc=True).limit(30).execute() + print("Recent logs for cmsolar-10:") + for item in res.data: + print(f"{item['created_at']} | {item.get('current_kw', 'N/A')} kW | {item.get('today_kwh', 'N/A')} kWh") + +if __name__ == "__main__": + check_db() diff --git a/tests/debug_kremc.py b/tests/debug_kremc.py new file mode 100644 index 0000000..614d40b --- /dev/null +++ b/tests/debug_kremc.py @@ -0,0 +1,43 @@ + +import requests +from dotenv import load_dotenv +load_dotenv() +from config import get_all_plants +from crawlers.kremc import fetch_data +from crawlers.base import create_session + +def debug_kremc(): + plants = get_all_plants() + # 5호기 (kremc) 찾기 - id가 kremc-05인 것 + target = next((p for p in plants if p['id'] == 'kremc-05'), None) + + if not target: + print("Plant kremc-05 not found") + return + + print(f"Debug target: {target['name']}") + + print(f"Debug target: {target['name']}") + + from datetime import datetime + today = datetime.now().strftime('%Y-%m-%d') + print(f"Fetching hourly history for {today}...") + + from crawlers.kremc import fetch_history_hourly + from database import save_history + try: + results = fetch_history_hourly(target, today, today) + print(f"Hourly Results ({len(results)}):") + for r in results: + print(f" {r['timestamp']}: {r['generation_kwh']} kWh") + + if results: + print("Saving to DB...") + save_history(results, 'hourly') + print("Done.") + + except Exception as e: + print(f"Exception: {e}") + +if __name__ == "__main__": + debug_kremc() diff --git a/tests/debug_kremc_realtime.py b/tests/debug_kremc_realtime.py new file mode 100644 index 0000000..81d001e --- /dev/null +++ b/tests/debug_kremc_realtime.py @@ -0,0 +1,30 @@ + +import sys +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from dotenv import load_dotenv +load_dotenv() + +from config import get_all_plants +from crawlers.kremc import fetch_data + +def debug_kremc_realtime(): + plants = get_all_plants() + target = next((p for p in plants if p['id'] == 'kremc-05'), None) + + if not target: + print("Plant 5 not found") + return + + print(f"Debug target: {target['name']}") + + try: + print("Fetching data...") + results = fetch_data(target) + print(f"Results: {results}") + except Exception as e: + print(f"Error: {e}") + +if __name__ == "__main__": + debug_kremc_realtime() diff --git a/tests/fill_today_data.py b/tests/fill_today_data.py new file mode 100644 index 0000000..a791bac --- /dev/null +++ b/tests/fill_today_data.py @@ -0,0 +1,53 @@ + +import sys +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from dotenv import load_dotenv +load_dotenv() + +from datetime import datetime +from database import get_supabase_client, save_history +from config import get_all_plants +from crawlers.kremc import fetch_history_hourly as fetch_kremc +from crawlers.cmsolar import fetch_history_hourly as fetch_cmsolar + +def cleanup_history(plant_id, today_str): + client = get_supabase_client() + # Delete 'History' status records for today to avoid duplicates/bad data + # Filter by created_at >= today's start and status='History' + + # Simple approach: delete records with status='History' created today + # KST date string is tricky for created_at (UTC), but status='History' is unique to our manual script + try: + res = client.table('solar_logs').delete().eq('plant_id', plant_id).eq('status', 'History').execute() + print(f"[{plant_id}] Cleaned up {len(res.data)} old history records.") + except Exception as e: + print(f"[{plant_id}] Cleanup failed (might be empty): {e}") + +def fill_today_data(): + plants = get_all_plants() + kremc_plant = next((p for p in plants if p['id'] == 'kremc-05'), None) + cmsolar_plant = next((p for p in plants if p['id'] == 'cmsolar-10'), None) + + today = "2026-01-29" + print(f"Filling data for {today}...") + + # 1. KREMC (5호기) - Skip as it's done + # if kremc_plant: ... + + # 2. CMSolar (10호기) + if cmsolar_plant: + print("\n--- Processing CMSolar (10호기) ---") + cleanup_history('cmsolar-10', today) + try: + results = fetch_cmsolar(cmsolar_plant, today, today) + print(f"Fetched results: {results}") + if results: + save_history(results, 'hourly') + print("Saved CMSolar data.") + except Exception as e: + print(f"CMSolar Error: {e}") + +if __name__ == "__main__": + fill_today_data() diff --git a/verify_data.py b/verify_data.py new file mode 100644 index 0000000..dfa6a90 --- /dev/null +++ b/verify_data.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +데이터 검증 스크립트 +각 발전소별로 특정 날짜/월/연도의 실제 데이터를 조회하여 검증 +""" + +from datetime import datetime +from crawlers import nrems, hyundai, kremc, sun_wms, cmsolar +from config import SYSTEM_CONSTANTS + + +def format_hourly_data(data, date_str, plant_name): + """시간별 데이터 포맷팅""" + print(f"\n{'='*80}") + print(f"[{plant_name}] 시간별 데이터: {date_str}") + print(f"{'='*80}") + + if not data: + print(" ❌ 데이터 없음") + return + + # 시간별로 그룹화 + hourly_dict = {} + for record in data: + timestamp = record.get('timestamp', '') + if timestamp.startswith(date_str): + hour = timestamp.split(' ')[1][:2] if ' ' in timestamp else '00' + kwh = record.get('generation_kwh', 0) + if hour not in hourly_dict: + hourly_dict[hour] = 0 + hourly_dict[hour] += kwh + + if not hourly_dict: + print(" ❌ 해당 날짜 데이터 없음") + return + + print(f" 총 {len(hourly_dict)}시간 데이터") + print(f"\n {'시간':<8} {'발전량(kWh)':<15}") + print(f" {'-'*25}") + + total = 0 + for hour in sorted(hourly_dict.keys()): + kwh = hourly_dict[hour] + total += kwh + print(f" {hour}:00 {kwh:>10.2f}") + + print(f" {'-'*25}") + print(f" {'합계':<8} {total:>10.2f}") + + +def format_daily_data(data, year_month, plant_name): + """일별 데이터 포맷팅""" + print(f"\n{'='*80}") + print(f"[{plant_name}] 일별 데이터: {year_month}") + print(f"{'='*80}") + + if not data: + print(" ❌ 데이터 없음") + return + + # 해당 월의 데이터만 필터링 + monthly_data = [d for d in data if d.get('date', '').startswith(year_month)] + + if not monthly_data: + print(" ❌ 해당 월 데이터 없음") + return + + print(f" 총 {len(monthly_data)}일 데이터") + print(f"\n {'날짜':<15} {'발전량(kWh)':<15}") + print(f" {'-'*30}") + + total = 0 + for record in sorted(monthly_data, key=lambda x: x.get('date', '')): + date = record.get('date', '') + kwh = record.get('generation_kwh', 0) + total += kwh + print(f" {date:<15} {kwh:>10.2f}") + + print(f" {'-'*30}") + print(f" {'합계':<15} {total:>10.2f}") + + +def format_monthly_data(data, year, plant_name): + """월별 데이터 포맷팅""" + print(f"\n{'='*80}") + print(f"[{plant_name}] 월별 데이터: {year}년") + print(f"{'='*80}") + + if not data: + print(" ❌ 데이터 없음") + return + + # 해당 연도의 데이터만 필터링 + yearly_data = [d for d in data if d.get('month', '').startswith(year)] + + if not yearly_data: + print(" ❌ 해당 연도 데이터 없음") + return + + print(f" 총 {len(yearly_data)}개월 데이터") + print(f"\n {'월':<10} {'발전량(kWh)':<15}") + print(f" {'-'*25}") + + total = 0 + for record in sorted(yearly_data, key=lambda x: x.get('month', '')): + month = record.get('month', '') + kwh = record.get('generation_kwh', 0) + total += kwh + print(f" {month:<10} {kwh:>10.2f}") + + print(f" {'-'*25}") + print(f" {'합계':<10} {total:>10.2f}") + if len(yearly_data) > 0: + print(f" {'평균':<10} {total/len(yearly_data):>10.2f}") + + +def verify_plant(plant_config, crawler_module): + """개별 발전소 데이터 검증""" + plant_name = plant_config['name'] + + print(f"\n{'#'*80}") + print(f"# {plant_name}") + print(f"{'#'*80}") + + try: + # 1. 시간별 데이터: 2025-05-10, 2024-10-20 + print(f"\n[1/6] 시간별 데이터 수집 중...") + + hourly_2025 = crawler_module.fetch_history_hourly(plant_config, '2025-05-10', '2025-05-10') + format_hourly_data(hourly_2025, '2025-05-10', plant_name) + + hourly_2024 = crawler_module.fetch_history_hourly(plant_config, '2024-10-20', '2024-10-20') + format_hourly_data(hourly_2024, '2024-10-20', plant_name) + + # 2. 일별 데이터: 2025-05, 2024-07 + print(f"\n[2/6] 일별 데이터 수집 중...") + + daily_2025 = crawler_module.fetch_history_daily(plant_config, '2025-05-01', '2025-05-31') + format_daily_data(daily_2025, '2025-05', plant_name) + + daily_2024 = crawler_module.fetch_history_daily(plant_config, '2024-07-01', '2024-07-31') + format_daily_data(daily_2024, '2024-07', plant_name) + + # 3. 월별 데이터: 2024년, 2025년 + print(f"\n[3/6] 월별 데이터 수집 중...") + + monthly_2025 = crawler_module.fetch_history_monthly(plant_config, '2025-01', '2025-12') + format_monthly_data(monthly_2025, '2025', plant_name) + + monthly_2024 = crawler_module.fetch_history_monthly(plant_config, '2024-01', '2024-12') + format_monthly_data(monthly_2024, '2024', plant_name) + + print(f"\n>>> {plant_name} 검증 완료") + + except Exception as e: + print(f"\n ❌ 오류 발생: {str(e)}") + import traceback + traceback.print_exc() + + +def main(): + """메인 함수""" + print("\n" + "="*80) + print(">>> 발전소 데이터 검증 스크립트 <<<") + print("="*80) + print(f"검증 일시: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + print("\n[검증 대상]") + print(" - 시간별: 2025-05-10, 2024-10-20") + print(" - 일별: 2025년 5월, 2024년 7월") + print(" - 월별: 2025년, 2024년") + + # 테스트 대상 발전소 설정 + test_plants = [ + # NREMS 1,2호기 (분리) + ({'id': 'nrems-01', 'name': '1호기', 'type': 'nrems', + 'auth': {'pscode': 'duce2023072288'}, + 'options': {'is_split': True, 'unit_id': 1}, + 'system': SYSTEM_CONSTANTS['nrems']}, nrems), + + ({'id': 'nrems-02', 'name': '2호기', 'type': 'nrems', + 'auth': {'pscode': 'duce2023072288'}, + 'options': {'is_split': True, 'unit_id': 2}, + 'system': SYSTEM_CONSTANTS['nrems']}, nrems), + + # NREMS 3호기 + ({'id': 'nrems-03', 'name': '3호기', 'type': 'nrems', + 'auth': {'pscode': 'dc2023121086'}, + 'options': {}, + 'system': SYSTEM_CONSTANTS['nrems']}, nrems), + + # NREMS 4호기 + ({'id': 'nrems-04', 'name': '4호기', 'type': 'nrems', + 'auth': {'pscode': 'duce2023072269'}, + 'options': {}, + 'system': SYSTEM_CONSTANTS['nrems']}, nrems), + + # NREMS 9호기 + ({'id': 'nrems-09', 'name': '9호기', 'type': 'nrems', + 'auth': {'pscode': 'a2020061008'}, + 'options': {}, + 'system': SYSTEM_CONSTANTS['nrems']}, nrems), + + # KREMC 5호기 + ({'id': 'kremc-05', 'name': '5호기', 'type': 'kremc', + 'auth': {'user_id': '서대문도서관', 'password': 'sunhope5!'}, + 'options': {'cid': '10013000376', 'cityProvCode': '11', 'rgnCode': '11410', + 'dongCode': '1141011700', 'enso_type_code': '15001'}, + 'system': SYSTEM_CONSTANTS['kremc']}, kremc), + + # Sun-WMS 6호기 + ({'id': 'sunwms-06', 'name': '6호기', 'type': 'sun_wms', + 'auth': {'payload_id': 'kc0fXUW0LUm2wZa+2NQI0Q==', 'payload_pw': 'PGXjU6ib2mKYwtrh2i3fIQ=='}, + 'options': {}, + 'system': SYSTEM_CONSTANTS['sun_wms']}, sun_wms), + + # Hyundai 8호기 + ({'id': 'hyundai-08', 'name': '8호기', 'type': 'hyundai', + 'auth': {'user_id': 'epecoop', 'password': 'sunhope0419', 'site_id': 'M0494'}, + 'options': {}, + 'system': SYSTEM_CONSTANTS['hyundai']}, hyundai), + + # CMSolar 10호기 + ({'id': 'cmsolar-10', 'name': '10호기', 'type': 'cmsolar', + 'auth': {'login_id': 'smart3131', 'password': 'ehdrb!123', 'site_no': '834'}, + 'options': {}, + 'system': SYSTEM_CONSTANTS['cmsolar']}, cmsolar), + ] + + # 각 발전소 검증 + for plant_config, crawler_module in test_plants: + try: + verify_plant(plant_config, crawler_module) + except KeyboardInterrupt: + print("\n\n⚠️ 사용자 중단") + break + except Exception as e: + print(f"\n❌ {plant_config['name']} 검증 실패: {str(e)}") + import traceback + traceback.print_exc() + continue + + print("\n" + "="*80) + print(">>> 데이터 검증 완료 <<<") + print("="*80) + + +if __name__ == '__main__': + main()