Initial commit with fixed daily summary logic

This commit is contained in:
haneulai 2026-01-30 11:43:08 +09:00
commit 20ef587800
31 changed files with 7238 additions and 0 deletions

49
.gitignore vendored Normal file
View File

@ -0,0 +1,49 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
.pytest_cache/
.coverage
htmlcov/
.tox/
.nox/
.venv
venv/
ENV/
env.bak/
venv.bak/
# Environment Variables
.env
.env.local
# IDE
.vscode/
.idea/
# Custom
*.log
*.sqlite3
crawler_manager.db
temp_env/
tests/db_dump.csv
tests/results.csv
tests/*_log.txt

208
config.py Normal file
View File

@ -0,0 +1,208 @@
# ==========================================
# config.py - 다중 업체(Multi-Tenant) 설정 관리
# ==========================================
# ---------------------------------------------------------
# [시스템 상수] 각 크롤러 시스템의 URL 및 엔드포인트
# ---------------------------------------------------------
SYSTEM_CONSTANTS = {
'nrems': {
'api_url': 'http://www.nrems.co.kr/v2/local/proc/index_proc.php',
'detail_url': 'http://www.nrems.co.kr/v2/local/comp/cp_inv.php',
'inv_proc_url': 'http://www.nrems.co.kr/v2/local/proc/cp_inv_proc.php'
},
'kremc': {
'login_url': 'https://kremc.kr/api/v2.2/login',
'api_base': 'https://kremc.kr/api/v2.2',
'enso_type': '15001'
},
'sun_wms': {
'base_url': 'http://tb6.sun-wms.com',
'login_url': 'http://tb6.sun-wms.com/public/main/login_chk.php',
'data_url': 'http://tb6.sun-wms.com/public/main/realdata.php',
'statics_url': 'http://tb6.sun-wms.com/public/statics/statics.php'
},
'hyundai': {
'base_url': 'https://hs3.hyundai-es.co.kr',
'login_path': '/hismart/login',
'data_path': '/hismart/site/getSolraUnitedWork'
},
'cmsolar': {
'base_url': 'http://www.cmsolar2.kr',
'api_url': 'http://www.cmsolar2.kr',
'login_url': 'http://www.cmsolar2.kr/login_ok.php',
'data_url': 'http://www.cmsolar2.kr/plant/sub/report_ok.php'
}
}
# ---------------------------------------------------------
# [업체 목록] 업체 > 발전소 계층 구조
# ---------------------------------------------------------
COMPANIES = [
{
'company_id': 'sunwind',
'company_name': '태양과바람',
'plants': [
# NREMS 계열 - 1, 2호기 (분리 처리)
# id는 크롤러 내부에서 'nrems-01', 'nrems-02'로 분리 할당
{
'name': '1호기, 2호기',
'display_name': 'SPLIT_1_2',
'type': 'nrems',
'auth': {
'pscode': 'duce2023072288'
},
'options': {
'is_split': True
},
'start_date': '2014-03-31',
'capacity_kw': 100.0 # 1호기 50kW + 2호기 50kW
# id는 크롤러에서 동적 할당 (nrems-01, nrems-02)
},
# NREMS 계열 - 3호기
{
'id': 'nrems-03',
'name': '3호기',
'type': 'nrems',
'auth': {
'pscode': 'dc2023121086'
},
'options': {
'is_split': False
},
'start_date': '2015-12-22',
'capacity_kw': 99.82
},
# NREMS 계열 - 4호기
{
'id': 'nrems-04',
'name': '4호기',
'type': 'nrems',
'auth': {
'pscode': 'dc2023121085'
},
'options': {
'is_split': False
},
'start_date': '2017-01-11',
'capacity_kw': 88.2
},
# NREMS 계열 - 9호기
{
'id': 'nrems-09',
'name': '9호기',
'type': 'nrems',
'auth': {
'pscode': 'a2020061008'
},
'options': {
'is_split': False
},
'start_date': '2020-10-28',
'capacity_kw': 99.12
},
# KREMC - 5호기
{
'id': 'kremc-05',
'name': '5호기',
'type': 'kremc',
'auth': {
'user_id': '서대문도서관',
'password': 'sunhope5!'
},
'options': {
'cid': '10013000376',
'cityProvCode': '11',
'rgnCode': '11410',
'dongCode': '1141011700'
},
'start_date': '2018-06-28',
'capacity_kw': 42.7
},
# Sun-WMS - 6호기
{
'id': 'sunwms-06',
'name': '6호기',
'type': 'sun_wms',
'auth': {
'payload_id': 'kc0fXUW0LUm2wZa+2NQI0Q==',
'payload_pw': 'PGXjU6ib2mKYwtrh2i3fIQ=='
},
'options': {},
'start_date': '2019-12-30',
'capacity_kw': 49.9
},
# 현대 - 8호기
{
'id': 'hyundai-08',
'name': '8호기',
'type': 'hyundai',
'auth': {
'user_id': 'epecoop',
'password': 'sunhope0419',
'site_id': 'M0494'
},
'options': {},
'start_date': '2020-02-06',
'capacity_kw': 99.9
},
# CMSolar - 10호기
{
'id': 'cmsolar-10',
'name': '10호기',
'type': 'cmsolar',
'auth': {
'login_id': 'sy7144',
'login_pw': 'sy7144',
'site_no': '834'
},
'options': {},
'start_date': '2020-08-31',
'capacity_kw': 31.5
}
]
}
]
# ---------------------------------------------------------
# [헬퍼 함수] 평탄화된 발전소 리스트 반환
# ---------------------------------------------------------
def get_all_plants():
"""
모든 업체의 발전소 정보를 평탄화하여 반환
"""
all_plants = []
for company in COMPANIES:
company_id = company.get('company_id', '')
company_name = company.get('company_name', '')
for plant in company.get('plants', []):
plant_type = plant.get('type', '')
system_config = SYSTEM_CONSTANTS.get(plant_type, {})
plant_info = {
'company_id': company_id,
'company_name': company_name,
'id': plant.get('id', ''), # DB용 고유 ID
'name': plant.get('name', ''),
'display_name': plant.get('display_name', plant.get('name', '')),
'type': plant_type,
'auth': plant.get('auth', {}),
'options': plant.get('options', {}),
'start_date': plant.get('start_date', ''),
'capacity_kw': plant.get('capacity_kw', 0.0),
'system': system_config
}
all_plants.append(plant_info)
return all_plants
def get_plants_by_company(company_id):
"""특정 업체의 발전소만 반환"""
return [p for p in get_all_plants() if p['company_id'] == company_id]
def get_plants_by_type(plant_type):
"""특정 타입의 발전소만 반환"""
return [p for p in get_all_plants() if p['type'] == plant_type]

404
crawler_gui.py Normal file
View File

@ -0,0 +1,404 @@
import tkinter as tk
from tkinter import ttk, messagebox, scrolledtext
import threading
import subprocess
import sys
import os
import json
import sqlite3
from datetime import datetime
import time
# 프로젝트 루트 경로 추가
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(current_dir)
sys.path.append(project_root)
# 모듈 import 시도 (실패 시 예외처리)
try:
from config import get_all_plants
from crawler_manager import CrawlerManager
except ImportError:
# GUI 단독 실행 시 더미 데이터 사용 가능하도록
pass
class CrawlerControlPanel:
def __init__(self, root):
self.root = root
self.root.title("☀️ 태양광 발전 통합 관제 시스템 [관리자 모드]")
self.root.geometry("1100x750")
self.root.configure(bg="#f0f2f5")
# 스타일 설정
self.setup_styles()
# 데이터 매니저 초기화
try:
self.manager = CrawlerManager(os.path.join(project_root, "crawler_manager.db"))
self.plants = get_all_plants()
except:
self.manager = None
self.plants = []
# 메인 레이아웃
self.create_layout()
# 초기 데이터 로드
self.refresh_monitor()
def setup_styles(self):
style = ttk.Style()
style.theme_use('clam')
# 프리미엄 색상 팔레트
colors = {
'primary': '#2563eb',
'secondary': '#64748b',
'success': '#16a34a',
'danger': '#dc2626',
'bg': '#f8fafc',
'card': '#ffffff'
}
style.configure("Header.TLabel", font=("Malgun Gothic", 16, "bold"), background="#f0f2f5", foreground="#1e293b")
style.configure("Section.TLabel", font=("Malgun Gothic", 12, "bold"), background="#f0f2f5", foreground="#334155")
style.configure("Card.TFrame", background="#ffffff", relief="flat")
# 트리뷰 스타일 (표)
style.configure("Treeview",
background="#ffffff",
fieldbackground="#ffffff",
font=("Malgun Gothic", 10),
rowheight=30
)
style.configure("Treeview.Heading",
font=("Malgun Gothic", 10, "bold"),
background="#e2e8f0",
foreground="#1e293b"
)
# 버튼 스타일
style.configure("Action.TButton", font=("Malgun Gothic", 10), padding=6)
style.map("Action.TButton", background=[("active", "#dbeafe")])
def create_layout(self):
# 상단 헤더
header_frame = ttk.Frame(self.root, padding="20 20 20 10")
header_frame.pack(fill="x")
ttk.Label(header_frame, text="⚡ SolorPower Crawler Control", style="Header.TLabel").pack(side="left")
status_frame = ttk.Frame(header_frame)
status_frame.pack(side="right")
self.status_label = ttk.Label(status_frame, text="🟢 시스템 대기중", font=("Malgun Gothic", 10), foreground="green")
self.status_label.pack()
# 메인 컨텐츠 (좌우 분할)
main_paned = ttk.PanedWindow(self.root, orient="horizontal")
main_paned.pack(fill="both", expand=True, padx=20, pady=10)
# 좌측 패널: 발전소 목록 및 제어
left_frame = ttk.Frame(main_paned)
main_paned.add(left_frame, weight=2)
# 우측 패널: 로그 및 상세 정보
right_frame = ttk.Frame(main_paned)
main_paned.add(right_frame, weight=1)
# --- 좌측 패널 구성 ---
# 1. 제어 버튼 그룹
control_frame = ttk.LabelFrame(left_frame, text="통합 제어", padding=15)
control_frame.pack(fill="x", pady=(0, 15))
btn_grid = ttk.Frame(control_frame)
btn_grid.pack(fill="x")
ttk.Button(btn_grid, text="▶ 전체 수집 시작", command=self.run_all_crawlers, style="Action.TButton").pack(side="left", padx=5)
ttk.Button(btn_grid, text="🔄 새로고침", command=self.refresh_monitor, style="Action.TButton").pack(side="left", padx=5)
ttk.Button(btn_grid, text="📊 통계 요약 실행", command=self.run_daily_summary, style="Action.TButton").pack(side="left", padx=5)
# 2. 발전소 모니터링 테이블
table_frame = ttk.LabelFrame(left_frame, text="발전소 모니터링 현황", padding=10)
table_frame.pack(fill="both", expand=True)
columns = ("site_id", "name", "type", "status", "schedule", "last_run", "action", "history")
self.tree = ttk.Treeview(table_frame, columns=columns, show="tree headings", selectmode="browse")
self.tree.heading("site_id", text="ID")
self.tree.heading("name", text="발전소명")
self.tree.heading("type", text="타입")
self.tree.heading("status", text="상태")
self.tree.heading("schedule", text="스케줄")
self.tree.heading("last_run", text="최근 실행")
self.tree.heading("action", text="개별 제어")
self.tree.heading("history", text="과거 데이터")
self.tree.column("site_id", width=80)
self.tree.column("name", width=150)
self.tree.column("type", width=80)
self.tree.column("status", width=80)
self.tree.column("schedule", width=100)
self.tree.column("last_run", width=140)
self.tree.column("action", width=80)
self.tree.column("history", width=80)
scrollbar = ttk.Scrollbar(table_frame, orient="vertical", command=self.tree.yview)
self.tree.configure(yscroll=scrollbar.set)
self.tree.pack(side="left", fill="both", expand=True)
scrollbar.pack(side="right", fill="y")
# 우클릭 메뉴 (복구)
self.context_menu = tk.Menu(self.root, tearoff=0)
self.context_menu.add_command(label="▶ 이 사이트만 즉시 실행", command=self.run_selected_crawler)
self.context_menu.add_command(label="📑 상세 로그 보기", command=self.show_site_logs)
self.context_menu.add_separator()
self.context_menu.add_command(label="🔄 학습 모드로 리셋", command=self.reset_learning_mode)
# 이벤트 바인딩
self.tree.bind("<ButtonRelease-1>", self.on_tree_click)
self.tree.bind("<Button-3>", self.show_context_menu)
self.tree.bind("<Double-1>", lambda e: self.run_selected_crawler())
# --- 우측 패널 구성 ---
# 실시간 로그 뷰어
log_frame = ttk.LabelFrame(right_frame, text="실시간 시스템 로그", padding=10)
log_frame.pack(fill="both", expand=True)
self.log_text = scrolledtext.ScrolledText(log_frame, state='disabled', font=("Consolas", 9), bg="#1e293b", fg="#e2e8f0")
self.log_text.pack(fill="both", expand=True)
# 태그 설정 (로그 색상)
self.log_text.tag_config("INFO", foreground="#60a5fa")
self.log_text.tag_config("SUCCESS", foreground="#4ade80")
self.log_text.tag_config("ERROR", foreground="#f87171")
self.log_text.tag_config("WARNING", foreground="#fbbf24")
def log(self, message, level="INFO"):
"""로그 창에 메시지 출력"""
timestamp = datetime.now().strftime("%H:%M:%S")
full_msg = f"[{timestamp}] {message}\n"
self.log_text.configure(state='normal')
self.log_text.insert("end", full_msg, level)
self.log_text.see("end")
self.log_text.configure(state='disabled')
def refresh_monitor(self):
"""테이블 데이터 새로고침"""
# 기존 항목 제거
for i in self.tree.get_children():
self.tree.delete(i)
if not self.manager:
self.log("DB 매니저 로드 실패", "ERROR")
return
# DB에서 최신 상태 조회
site_stats = {s['site_id']: s for s in self.manager.get_all_sites()}
# 중복 회사 노드 방지용
added_companies = set()
for plant in self.plants:
# 1,2호기 분리 로직 반영
is_split = plant.get('options', {}).get('is_split', False)
company_name = plant.get('company_name', '')
plant_name = plant.get('name', '')
sub_units = []
if is_split:
sub_units.append({'id': 'nrems-01', 'name': f'{company_name} 1호기', 'type': plant['type']})
sub_units.append({'id': 'nrems-02', 'name': f'{company_name} 2호기', 'type': plant['type']})
else:
plant_id = plant.get('id', '')
if plant_id:
sub_units.append({'id': plant_id, 'name': f'{company_name} {plant_name}', 'type': plant['type']})
for unit in sub_units:
site_id = unit['id']
stat = site_stats.get(site_id, {})
status_text = stat.get('status', 'UNREGISTERED')
schedule_text = f"매시 {stat.get('target_minute', -1)}" if stat.get('target_minute', -1) >= 0 else "학습중"
last_run = stat.get('last_run', '-') or '-'
if last_run != '-':
try:
last_run = last_run.split('.')[0].replace('T', ' ') # 포맷팅
except: pass
# 태그 설정 (색상)
row_tag = "normal"
if status_text == 'OPTIMIZED': row_tag = "optimized"
# 회사 노드 확인 및 생성
company_id = plant.get('company_id', 'unknown')
if company_id not in added_companies:
self.tree.insert("", "end", iid=company_id, text=company_name, values=(
"", company_name, "GROUP", "", "", "", "", ""
), open=True)
added_companies.add(company_id)
# 발전소 노드 추가 (회사 노드 하위)
self.tree.insert(company_id, "end", iid=site_id, values=(
site_id,
unit['name'],
unit['type'].upper(),
status_text,
schedule_text,
last_run,
"▶ 실행",
"📥 수집"
), tags=(row_tag,))
self.tree.tag_configure("optimized", foreground="#059669") # 진한 녹색
self.log("모니터링 상태 갱신 완료 (계층형)", "INFO")
def on_tree_click(self, event):
"""트리뷰 클릭 이벤트 처리"""
try:
region = self.tree.identify_region(event.x, event.y)
if region != "cell": return
col = self.tree.identify_column(event.x)
item_id = self.tree.identify_row(event.y)
if not item_id: return
# 컬럼 인덱스 확인 (columns 배열 기준 1-based, #1=site_id, ... #7=action, #8=history)
# Treeview columns: ("site_id", "name", "type", "status", "schedule", "last_run", "action", "history")
# Display columns include transparent tree column if show="tree headings"
# identify_column returns '#N'.
# #1: site_id, #7: action, #8: history
if col == '#7': # Action (실행)
self.log(f"'{item_id}' 실행 요청", "INFO")
# TODO: 개별 실행
self.run_process_thread(["main.py", "--site", item_id], f"{item_id} 수집")
elif col == '#8': # History (과거 데이터)
# 그룹 노드는 제외
if self.tree.parent(item_id) == "":
return
if messagebox.askyesno("과거 데이터 수집", f"'{item_id}'의 과거 내역을 수집하시겠습니까?\n(시간별/일별/월별 전체)"):
self.run_process_thread(["fetch_history.py", item_id], f"{item_id} 히스토리 수집")
except Exception as e:
self.log(f"클릭 처리 중 오류: {e}", "ERROR")
def show_context_menu(self, event):
item = self.tree.identify_row(event.y)
if item:
self.tree.selection_set(item)
self.context_menu.post(event.x_root, event.y_root)
def run_process_thread(self, cmd_list, description):
"""백그라운드 스레드에서 서브프로세스 실행"""
def task():
self.status_label.config(text=f"{description} 중...", foreground="orange")
self.log(f"{description} 시작...", "INFO")
try:
# python 실행 경로 확보
python_exe = sys.executable
# 가상환경 venv/temp_env 사용 시 경로 조정
venv_python = os.path.join(project_root, "venv", "Scripts", "python.exe")
temp_env_python = os.path.join(current_dir, "temp_env", "Scripts", "python.exe")
if os.path.exists(temp_env_python):
python_exe = temp_env_python
elif os.path.exists(venv_python):
python_exe = venv_python
full_cmd = [python_exe] + cmd_list
# 서브프로세스 실행
process = subprocess.Popen(
full_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=current_dir,
text=True,
encoding='utf-8',
errors='replace' # 인코딩 에러 방지
)
stdout, stderr = process.communicate()
if stdout:
for line in stdout.splitlines():
if "Error" in line or "fail" in line.lower():
self.log(line, "ERROR")
else:
self.log(line, "INFO")
if stderr:
self.log(f"STDERR: {stderr}", "WARNING")
if process.returncode == 0:
self.log(f"{description} 완료 ✅", "SUCCESS")
else:
self.log(f"{description} 실패 (Exit Code: {process.returncode})", "ERROR")
except Exception as e:
self.log(f"실행 오류: {e}", "ERROR")
finally:
self.root.after(0, self.refresh_monitor)
self.root.after(0, lambda: self.status_label.config(text="🟢 시스템 대기중", foreground="green"))
thread = threading.Thread(target=task)
thread.daemon = True
thread.start()
def run_all_crawlers(self):
"""전체 통합 크롤링 실행 (강제 모드)"""
if messagebox.askyesno("확인", "모든 발전소 데이터를 강제로 수집하시겠습니까?"):
self.run_process_thread(["main.py", "--force"], "전체 데이터 수집")
def run_selected_crawler(self):
"""선택된 단일 사이트 크롤링 (현재 main.py는 단일 실행 옵션이 없어서 전체를 돌리되, 추후 개선 필요)"""
# 임시로 단일 실행 기능이 없으므로 알림만 띄움 (추후 main.py에 --site 옵션 추가 필요)
selected = self.tree.selection()
if not selected:
return
site_id = selected[0]
# main.py 수정 없이 특정 사이트만 돌리기 어려우므로, 안내 메시지
# 실제로는 main.py에 인자 처리를 추가해야 함.
# 여기서는 전체 실행으로 대체하거나, 추후 main.py 업데이트 후 구현
# 임시 구현: main.py를 호출하되 필터링은 구현 안 되어있음.
# 이번 단계에서는 GUI 틀을 만드는 것이므로 전체 실행으로 트리거
self.log(f"'{site_id}' 단일 실행 요청 (현재는 전체 실행으로 동작)", "WARNING")
self.run_process_thread(["main.py", "--force"], f"'{site_id}' 데이터 수집")
def run_daily_summary(self):
"""일일 통계 집계 실행"""
self.run_process_thread(["daily_summary.py"], "일일 통계 집계")
def show_site_logs(self):
selected = self.tree.selection()
if selected:
site_id = selected[0]
self.log(f"'{site_id}' 로그 조회 기능은 아직 구현되지 않았습니다.", "INFO")
def reset_learning_mode(self):
selected = self.tree.selection()
if selected:
site_id = selected[0]
if self.manager.reset_to_learning(site_id):
self.log(f"'{site_id}' 학습 모드로 리셋 완료", "SUCCESS")
self.refresh_monitor()
if __name__ == "__main__":
root = tk.Tk()
# 아이콘 설정 (옵션)
# try: root.iconbitmap("icon.ico")
# except: pass
app = CrawlerControlPanel(root)
root.mainloop()

369
crawler_manager.py Normal file
View File

@ -0,0 +1,369 @@
# ==========================================
# crawler_manager.py - 크롤링 스케줄 최적화 미들웨어
# ==========================================
# NAS 리소스 절약을 위해 SQLite 기반으로 각 사이트의
# 업데이트 패턴을 학습하고 최적 시점에만 크롤링 실행
import sqlite3
from datetime import datetime, timedelta
from pathlib import Path
class CrawlerManager:
"""
크롤링 스케줄을 자동으로 최적화하는 매니저 클래스
- LEARNING 상태: 모든 크롤링 허용 (패턴 학습 )
- OPTIMIZED 상태: 학습된 업데이트 시점 전후에만 크롤링 허용
"""
def __init__(self, db_path: str = None):
"""
DB 연결 테이블 초기화
Args:
db_path: SQLite DB 파일 경로. 기본값은 스크립트와 같은 디렉토리의 crawler_manager.db
"""
if db_path is None:
db_path = Path(__file__).parent / "crawler_manager.db"
self.db_path = str(db_path)
self._init_db()
def _init_db(self):
"""테이블이 없으면 생성"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS site_rules (
site_id TEXT PRIMARY KEY,
status TEXT DEFAULT 'LEARNING',
target_minute INTEGER DEFAULT -1,
start_date TEXT,
last_run TEXT
)
""")
conn.commit()
def _get_connection(self) -> sqlite3.Connection:
"""SQLite 연결 반환"""
return sqlite3.connect(self.db_path)
def register_site(self, site_id: str) -> bool:
"""
새로운 사이트 등록
Args:
site_id: 사이트 식별자 (: 'nrems-01')
Returns:
bool: 새로 등록되었으면 True, 이미 존재하면 False
"""
with self._get_connection() as conn:
cursor = conn.cursor()
# 이미 존재하는지 확인
cursor.execute("SELECT 1 FROM site_rules WHERE site_id = ?", (site_id,))
if cursor.fetchone():
return False
# 새로 등록
today = datetime.now().strftime("%Y-%m-%d")
cursor.execute("""
INSERT INTO site_rules (site_id, status, target_minute, start_date, last_run)
VALUES (?, 'LEARNING', -1, ?, NULL)
""", (site_id, today))
conn.commit()
print(f" 📝 [CrawlerManager] '{site_id}' 신규 등록 (LEARNING 모드)")
return True
def should_run(self, site_id: str) -> bool:
"""
현재 시점에 해당 사이트를 크롤링해야 하는지 판단
Args:
site_id: 사이트 식별자
Returns:
bool: 크롤링 실행 여부
"""
now = datetime.now()
current_hour = now.hour
current_minute = now.minute
# 야간 모드: 21시 ~ 05시에는 크롤링 중지
if current_hour >= 21 or current_hour < 5:
return False
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT status, target_minute, last_run
FROM site_rules
WHERE site_id = ?
""", (site_id,))
row = cursor.fetchone()
# 등록되지 않은 사이트면 일단 등록 후 True 반환
if not row:
self.register_site(site_id)
return True
status, target_minute, last_run = row
# LEARNING 상태: 항상 실행 허용 (패턴 학습 목적)
if status == "LEARNING":
return True
# OPTIMIZED 상태: 최적화된 시간대에만 실행
if status == "OPTIMIZED" and target_minute >= 0:
# target_minute 이후 10분 윈도우 내에서만 허용
# 예: target_minute=15 → 15~24분 사이에만 실행
window_start = target_minute
window_end = (target_minute + 10) % 60
# 윈도우가 시간 경계를 넘는 경우 (예: 55~04분)
if window_start <= window_end:
in_window = window_start <= current_minute < window_end
else:
in_window = current_minute >= window_start or current_minute < window_end
if not in_window:
return False
# 중복 실행 방지: 최근 1시간 내 실행 이력이 있으면 스킵
if last_run:
try:
last_run_dt = datetime.fromisoformat(last_run)
if now - last_run_dt < timedelta(hours=1):
return False
except (ValueError, TypeError):
pass
return True
# 기타 상태는 기본적으로 허용
return True
def update_optimization(self, site_id: str, detected_minute: int) -> bool:
"""
사이트의 업데이트 패턴이 감지되면 OPTIMIZED 상태로 전환
Args:
site_id: 사이트 식별자
detected_minute: 업데이트가 감지된 (0~59)
Returns:
bool: 업데이트 성공 여부
"""
if not 0 <= detected_minute <= 59:
print(f" ⚠️ [CrawlerManager] 유효하지 않은 minute 값: {detected_minute}")
return False
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute("""
UPDATE site_rules
SET status = 'OPTIMIZED', target_minute = ?
WHERE site_id = ?
""", (detected_minute, site_id))
conn.commit()
if cursor.rowcount > 0:
print(f" ✅ [CrawlerManager] '{site_id}' → OPTIMIZED (매시 {detected_minute}분)")
return True
else:
print(f" ⚠️ [CrawlerManager] '{site_id}' 사이트를 찾을 수 없음")
return False
def record_run(self, site_id: str):
"""
크롤링 성공 마지막 실행 시간 기록
Args:
site_id: 사이트 식별자
"""
now_str = datetime.now().isoformat()
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute("""
UPDATE site_rules
SET last_run = ?
WHERE site_id = ?
""", (now_str, site_id))
conn.commit()
def get_site_info(self, site_id: str) -> dict:
"""
사이트 정보 조회 (디버깅/모니터링용)
Args:
site_id: 사이트 식별자
Returns:
dict: 사이트 정보 또는 None
"""
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT site_id, status, target_minute, start_date, last_run
FROM site_rules
WHERE site_id = ?
""", (site_id,))
row = cursor.fetchone()
if row:
return {
"site_id": row[0],
"status": row[1],
"target_minute": row[2],
"start_date": row[3],
"last_run": row[4]
}
return None
def get_all_sites(self) -> list:
"""
모든 사이트 정보 조회
Returns:
list: 모든 사이트 정보 리스트
"""
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT site_id, status, target_minute, start_date, last_run
FROM site_rules
ORDER BY site_id
""")
rows = cursor.fetchall()
return [
{
"site_id": row[0],
"status": row[1],
"target_minute": row[2],
"start_date": row[3],
"last_run": row[4]
}
for row in rows
]
def reset_to_learning(self, site_id: str) -> bool:
"""
사이트를 다시 LEARNING 상태로 리셋
Args:
site_id: 사이트 식별자
Returns:
bool: 리셋 성공 여부
"""
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute("""
UPDATE site_rules
SET status = 'LEARNING', target_minute = -1
WHERE site_id = ?
""", (site_id,))
conn.commit()
return cursor.rowcount > 0
# ==========================================
# Example Usage (main.py에서의 활용 예시)
# ==========================================
#
# from crawler_manager import CrawlerManager
# from crawlers import get_crawler
# from config import get_all_plants
#
# def main():
# # 매니저 초기화
# manager = CrawlerManager()
#
# # 모든 발전소 순회
# for plant in get_all_plants():
# site_id = plant.get('id', '')
#
# if not site_id:
# continue
#
# # 1. 사이트 등록 (최초 1회)
# manager.register_site(site_id)
#
# # 2. 실행 여부 확인
# if not manager.should_run(site_id):
# print(f" ⏭️ {site_id} 스킵 (최적화 윈도우 외)")
# continue
#
# # 3. 크롤링 실행
# try:
# crawler_func = get_crawler(plant['type'])
# data = crawler_func(plant)
#
# if data:
# # 4. 실행 기록
# manager.record_run(site_id)
#
# # 5. (옵션) 패턴 분석 후 최적화
# # 예: 데이터가 항상 매시 10분에 갱신된다면
# # manager.update_optimization(site_id, 10)
#
# except Exception as e:
# print(f" ❌ {site_id} 오류: {e}")
#
# if __name__ == "__main__":
# main()
#
# ==========================================
# Cron 예시 (5분마다 실행)
# ==========================================
# */5 * * * * cd /volume1/dev/SolorPower/crawler && \
# /volume1/dev/SolorPower/crawler/venv/bin/python main.py >> cron.log 2>&1
#
# - LEARNING 사이트는 5분마다 크롤링 (패턴 학습)
# - OPTIMIZED 사이트는 학습된 시점 직후 10분 윈도우에서만 크롤링
# - 야간(21시~05시)에는 모든 크롤링 중지
# ==========================================
if __name__ == "__main__":
# 테스트 코드
manager = CrawlerManager()
print("=== CrawlerManager 테스트 ===\n")
# 사이트 등록
test_sites = ["nrems-01", "nrems-02", "kremc-05"]
for site_id in test_sites:
manager.register_site(site_id)
# 현재 상태 출력
print("\n[등록된 사이트]")
for site in manager.get_all_sites():
print(f" {site['site_id']}: {site['status']} (target: {site['target_minute']}분)")
# should_run 테스트
print("\n[should_run 테스트]")
for site_id in test_sites:
result = manager.should_run(site_id)
print(f" {site_id}: {'✅ 실행' if result else '⏭️ 스킵'}")
# 최적화 적용
print("\n[최적화 적용]")
manager.update_optimization("nrems-01", 15) # 매시 15분에 업데이트
manager.update_optimization("kremc-05", 30) # 매시 30분에 업데이트
# 최적화 후 상태
print("\n[최적화 후 상태]")
for site in manager.get_all_sites():
print(f" {site['site_id']}: {site['status']} (target: {site['target_minute']}분)")
# 실행 기록
manager.record_run("nrems-01")
print("\n=== 테스트 완료 ===")

110
crawler_structure.md Normal file
View File

@ -0,0 +1,110 @@
# Crawler 시스템 파일 구조 및 역할 정의
이 문서는 `crawler` 폴더 내의 각 파일과 모듈의 역할, 기능, 그리고 상호 작용 방식에 대해 자세히 설명합니다.
## 📁 디렉토리 구조 및 핵심 파일 요약
| 파일명 | 분류 | 핵심 역할 |
|---|---|---|
| **main.py** | Core | 크롤러 시스템의 메인 진입점. 전체 수집 프로세스 조율 |
| **config.py** | Config | 발전소 정보, 비밀번호, 시스템 상수 등 설정 관리 |
| **database.py** | Data | Supabase 데이터베이스 연결 및 CRUD 처리 |
| **crawler_manager.py** | Logic | 지능형 스케줄링 관리 (업데이트 패턴 학습 및 최적화) |
| **crawler_gui.py** | UI | 관리자용 대시보드 (윈도우 GUI), 모니터링 및 수동 제어 |
| **daily_summary.py** | Batch | 일일 발전 통계 집계 및 요약 테이블 저장 |
| **fetch_history.py** | Tool | 과거 데이터(Hourly, Daily) 수집 도구 |
| **sync_plants.py** | Tool | 발전소 메타 정보를 DB와 동기화 |
| **verify_data.py** | Test | 수집된 데이터의 무결성 검증 및 테스트 스크립트 |
---
## 📄 파일별 상세 역할 분석
### 1. 핵심 시스템 (Core System)
#### `main.py`
* **역할**: 전체 크롤링 시스템의 오케스트레이터(Orchestrator).
* **주요 기능**:
* `integrated_monitoring()` 함수를 통해 정의된 모든 발전소를 순회합니다.
* `CrawlerManager`를 통해 현재 시점에 실행해야 할 크롤러를 선별합니다.
* 각 발전소 타입에 맞는 크롤러 함수(`crawlers` 패키지)를 동적으로 호출합니다.
* 수집된 실시간 데이터를 콘솔에 출력하고, `database.py`를 통해 DB에 저장합니다.
* 발전량이 0인 경우 등 간단한 이상 감지 로직을 수행합니다.
* **실행 방식**: 스케줄러(Cron 등)에 의해 주기적으로 실행되거나, GUI에서 호출됩니다. `--force` 옵션으로 강제 실행 가능합니다.
#### `config.py`
* **역할**: 시스템 설정 및 발전소 정보의 단일 진실 공급원(Single Source of Truth).
* **주요 기능**:
* `SYSTEM_CONSTANTS`: 각 크롤러 시스템(NREMS, KREMC 등)의 URL 및 API 엔드포인트 정의.
* `COMPANIES`: 업체 및 산하 발전소들의 계층 구조, 인증 정보(ID/PW), 용량(Customer ID) 등을 JSON 구조로 관리.
* `get_all_plants()`: 계층화된 데이터를 크롤러가 사용하기 쉬운 평탄화(Flat)된 리스트로 변환하여 제공.
* **특이 사항**: 보안이 필요한 인증 정보가 포함되어 있어 관리에 주의가 필요합니다. 1, 2호기와 같이 하나의 계정으로 분리되는 발전소(`is_split`) 설정도 이곳에서 관리됩니다.
#### `crawler_manager.py` (Smart Scheduler)
* **역할**: 비효율적인 반복 호출을 줄이고 NAS 리소스를 절약하기 위한 미들웨어.
* **주요 기능**:
* **SQLite 기반 상태 관리**: `crawler_manager.db` 로컬 파일에 각 발전소의 상태 저장.
* **학습 모드(LEARNING)**: 초기에는 자주 실행하며 발전소 서버의 데이터 업데이트 주기 패턴을 학습.
* **최적화 모드(OPTIMIZED)**: 학습된 업데이트 시점(예: 매시 15분) 전후의 윈도우(Window)에만 크롤링을 허용.
* 야간(21시~05시) 크롤링 자동 차단 로직 포함.
### 2. 데이터 관리 (Data Management)
#### `database.py`
* **역할**: Supabase 클라우드 데이터베이스와의 인터페이스.
* **주요 기능**:
* Supabase 클라이언트 싱글턴 연결 관리.
* `save_to_supabase()`: 실시간 발전 데이터(`solar_logs`) 저장. 일일 통계(`daily_stats`) 단순 Upsert 처리.
* `save_history()`: 과거 내역 저장 시 사용되며, `solar_logs`(Hourly), `daily_stats`(Daily), `monthly_stats`(Monthly) 등 데이터 타입에 따라 적절한 테이블에 저장하고, 월별 통계 자동 갱신 트리거 로직을 포함합니다.
#### `daily_summary.py`
* **역할**: 수집된 로그 데이터를 기반으로 일일 최종 통계를 확정 짓는 배치 스크립트.
* **주요 기능**:
* 특정 날짜의 `solar_logs`를 모두 조회하여 발전소별 총 발전량, 피크 출력, 발전 시간(이용률)을 계산.
* 계산된 확정 데이터를 `daily_stats` 테이블에 저장.
* 주로 하루가 끝나는 시점이나 다음 날 새벽에 실행하여 데이터 정확도를 보정합니다.
### 3. 사용자 인터페이스 (User Interface)
#### `crawler_gui.py`
* **역할**: 윈도우 환경에서 크롤러 상태를 시각적으로 모니터링하고 제어하는 관리자 도구.
* **주요 기능**:
* `tkinter` 기반의 GUI 제공.
* 발전소별 현재 상태(대기, 실행중, 최적화 여부), 마지막 실행 시간 등을 트리 뷰(Tree View)로 표시.
* 개별/전체 크롤링 강제 실행, 히스토리 수집 명령, 학습 모드 리셋 등의 제어 기능 제공.
* 실시간 로그 창을 통해 백그라운드 프로세스(`subprocess`)의 실행 결과를 출력.
### 4. 도구 및 유틸리티 (Tools & Utilities)
#### `fetch_history.py`
* **역할**: 누락된 데이터나 초기 구축 시 과거 데이터를 수집하기 위한 스크립트.
* **주요 기능**:
* 특정 발전소 ID를 인자로 받아 과거 데이터를 조회.
* 각 크롤러 모듈(`crawlers/`)에 구현된 `fetch_history_hourly`, `fetch_history_daily` 등을 호출.
* 시간별(Hourly), 일별(Daily) 데이터를 수집하여 DB에 적재.
#### `sync_plants.py`
* **역할**: 로컬 코드(`config.py`)와 원격 DB(`plants` 테이블) 간의 메타 데이터 동기화.
* **주요 기능**:
* 새로운 발전소가 추가되거나 이름/용량이 변경되었을 때, `config.py`의 내용을 DB의 마스터 테이블에 반영(Upsert).
* NREMS 1, 2호기와 같이 논리적으로 분리해야 하는 발전소를 별도 레코드로 DB에 생성.
#### `verify_data.py`
* **역할**: 크롤링 로직 검증 및 데이터 무결성 테스트.
* **주요 기능**:
* 각 발전소별로 샘플 날짜(과거/현재)를 지정하여 실제 데이터를 가져와 봅니다.
* 시간별, 일별, 월별 합계가 논리적으로 맞는지 검증 포맷을 출력하여 개발자가 확인하기 쉽게 돕습니다.
### 5. 하위 폴더
#### `crawlers/` (폴더)
* **역할**: 실제 사이트별 크롤링 로직이 구현된 모듈들의 집합.
* **구성**:
* `nrems.py`, `kremc.py`, `hyundai.py`, `sun_wms.py`, `cmsolar.py` 등 사이트 타입별로 파일이 존재.
* 각 모듈은 공통적으로 `get_current_status()` (실시간), `fetch_history_*` (과거 내역) 등의 인터페이스를 구현해야 함.
#### `venv/`, `temp_env/` (폴더)
* **역할**: Python 가상 환경 폴더. 프로젝트 실행에 필요한 라이브러리(`requests`, `pandas`, `supabase` 등)가 설치됨.
---
*작성일: 2026-01-28*

20
crawlers/__init__.py Normal file
View File

@ -0,0 +1,20 @@
# crawlers 패키지 초기화
from .nrems import fetch_data as fetch_nrems
from .kremc import fetch_data as fetch_kremc
from .sun_wms import fetch_data as fetch_sunwms
from .hyundai import fetch_data as fetch_hyundai
from .cmsolar import fetch_data as fetch_cmsolar
# 크롤러 타입별 매핑
CRAWLER_MAP = {
'nrems': fetch_nrems,
'kremc': fetch_kremc,
'sun_wms': fetch_sunwms,
'hyundai': fetch_hyundai,
'cmsolar': fetch_cmsolar
}
def get_crawler(crawler_type):
"""크롤러 타입에 해당하는 fetch 함수 반환"""
return CRAWLER_MAP.get(crawler_type)

100
crawlers/base.py Normal file
View File

@ -0,0 +1,100 @@
# ==========================================
# crawlers/base.py - 크롤러 공통 유틸리티
# ==========================================
import requests
def safe_float(value):
"""
안전한 float 변환
None, 문자열, 콤마 포함 숫자 등을 처리
"""
if value is None:
return 0.0
try:
return float(str(value).replace(',', ''))
except (ValueError, TypeError):
return 0.0
def create_session():
"""기본 설정된 requests 세션 생성"""
session = requests.Session()
return session
def get_default_headers():
"""기본 HTTP 헤더 반환"""
return {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'application/json, text/plain, */*'
}
def determine_status(current_kw):
"""발전량 기반 상태 결정"""
if current_kw > 0:
return "🟢 정상"
else:
return "💤 대기"
def format_result(name, kw, today, plant_id, status=None):
"""결과 딕셔너리 포맷 통일"""
if status is None:
status = determine_status(kw)
return {
'name': name,
'kw': kw,
'today': today,
'id': plant_id,
'status': status
}
def validate_data_quality(data_list, value_key='generation_kwh'):
"""
데이터 품질 검증
Returns:
dict: {
'is_valid': bool,
'warnings': list,
'all_zero': bool,
'duplicate_ratio': float
}
"""
if not data_list or len(data_list) == 0:
return {
'is_valid': False,
'warnings': ['데이터 없음'],
'all_zero': True,
'duplicate_ratio': 0.0
}
warnings = []
values = [safe_float(item.get(value_key, 0)) for item in data_list]
# 모두 0인 경우 체크
all_zero = all(v == 0 for v in values)
if all_zero:
warnings.append('모든 값이 0 - 실제 데이터가 아닐 가능성')
# 연속 중복 체크
if len(values) > 1:
duplicates = 0
for i in range(len(values) - 1):
if values[i] == values[i+1]:
duplicates += 1
duplicate_ratio = duplicates / (len(values) - 1)
if duplicate_ratio > 0.8:
warnings.append(f'연속 중복 비율 {duplicate_ratio*100:.1f}% - 실제 데이터가 아닐 가능성')
else:
duplicate_ratio = 0.0
is_valid = not all_zero and duplicate_ratio < 0.8
return {
'is_valid': is_valid,
'warnings': warnings,
'all_zero': all_zero,
'duplicate_ratio': duplicate_ratio
}

512
crawlers/cmsolar.py Normal file
View File

@ -0,0 +1,512 @@
# ==========================================
# crawlers/cmsolar.py - CMSolar 크롤러 (10호기)
# HTML 테이블 파싱 방식
# ==========================================
import requests
import re
from .base import create_session, safe_float
def fetch_data(plant_info):
"""
CMSolar 발전소 데이터 수집
"""
plant_id = plant_info.get('id', 'cmsolar-10')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
company_name = plant_info.get('company_name', '함안햇빛발전소')
plant_name = plant_info.get('name', '10호기')
login_id = auth.get('login_id', '')
login_pw = auth.get('login_pw', '')
site_no = auth.get('site_no', '')
login_url = system.get('login_url', '')
data_url = system.get('data_url', '')
session = create_session()
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded'
}
# 로그인
login_data = {
'login_id': login_id,
'login_pw': login_pw,
'site_no': site_no
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code != 200:
return []
# Site selection (Required for idx_ok.php)
base_url = system.get('base_url', 'http://www.cmsolar2.kr')
change_url = f"{base_url}/change.php?site={site_no}"
session.get(change_url, headers=headers)
except Exception as e:
print(f"{plant_name} 접속 에러: {e}")
return []
# 데이터 요청 (JSON Endpoint)
target_url = f"{base_url}/plant/sub/idx_ok.php?mode=getPlant"
try:
res = session.get(target_url, headers=headers)
if res.status_code == 200:
# Handle potential encoding issues if needed, though requests usually guesses well
if res.encoding is None:
res.encoding = 'utf-8'
data = res.json()
# Parsing logic for [{"plant": {...}}] structure
if isinstance(data, list) and len(data) > 0:
plant_data = data[0].get('plant', {})
# Unit Conversion: W -> kW
curr_kw = safe_float(plant_data.get('now', 0)) / 1000.0
today_kwh = safe_float(plant_data.get('today', 0)) / 1000.0
# Status check
is_error = int(plant_data.get('inv_error', 0))
status = "🟢 정상" if is_error == 0 else "🔴 점검/고장"
# 0kW during day is suspicious but night is normal.
# If needed, override status based on time, but sticking to error flag is safer.
if curr_kw == 0 and status == "🟢 정상":
# Optional: Check if night time?
pass
return [{
'id': plant_id,
'name': f'{company_name} {plant_name}',
'kw': curr_kw,
'today': today_kwh,
'status': status
}]
else:
print(f"{plant_name} 데이터 형식 오류: {data}")
return []
else:
return []
except Exception as e:
print(f"{plant_name} 에러: {e}")
return []
def fetch_history_hourly(plant_info, start_date, end_date):
"""
CMSolar 발전소의 시간대별 과거 데이터 수집
실제 엔드포인트: /plant/sub/report_ok.php (HTML 테이블 응답)
파라미터: mode=getPowers&type=daily&device=total&start=YYYY-MM-DD&money=
"""
from datetime import datetime, timedelta
results = []
plant_id = plant_info.get('id', 'cmsolar-10')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '10호기')
login_id = auth.get('login_id', '')
login_pw = auth.get('login_pw', '')
site_no = auth.get('site_no', '')
login_url = system.get('login_url', '')
# 실제 데이터 엔드포인트
base_url = system.get('api_url', 'http://www.cmsolar2.kr')
data_url = f"{base_url}/plant/sub/report_ok.php"
session = create_session()
print(f"\n{'='*60}")
print(f"[CMSolar History] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {
'login_id': login_id,
'login_pw': login_pw,
'site_no': site_no
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 사이트 선택 (필수!)
try:
change_url = f"{base_url}/change.php?site={site_no}"
session.get(change_url, headers=headers)
print(" ✓ Site selected")
except Exception as e:
print(f" ✗ Site selection error: {e}")
return results
# 날짜 반복
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
# 실제 확인된 시간별 엔드포인트 (type=daily는 하루 치 시간별 데이터 반환)
params = {
'mode': 'getPowers',
'type': 'daily',
'device': 'total',
'start': date_str,
'money': ''
}
try:
res = session.get(data_url, params=params, headers=headers, timeout=10)
res.encoding = 'utf-8'
if res.status_code == 200:
# HTML 테이블 파싱
html = res.text
# <tbody> 안의 <tr> 태그 찾기
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
if tbody_match:
tbody_content = tbody_match.group(1)
# 각 <tr> 파싱 (시간과 발전량)
# <tr class="odd"><td>9</td><td>3.0</td>...
tr_pattern = r'<tr[^>]*>\s*<td>(\d+)</td>\s*<td>([\d.]+)</td>'
matches = re.findall(tr_pattern, tbody_content)
if matches:
print(f" ✓ Found {len(matches)} hourly records for {date_str}")
for hour, kwh in matches:
generation_kwh = safe_float(kwh)
timestamp = f"{date_str} {hour.zfill(2)}:00:00"
results.append({
'plant_id': plant_id,
'timestamp': timestamp,
'generation_kwh': generation_kwh,
'current_kw': 0
})
else:
print(f" ⚠ No data for {date_str}")
else:
print(f" ⚠ No tbody found for {date_str}")
else:
print(f" ✗ HTTP {res.status_code}")
except Exception as e:
print(f" ✗ Error for {date_str}: {e}")
current_date += timedelta(days=1)
print(f"\n{'='*60}")
print(f"[Total] Collected {len(results)} hourly records")
print(f"{'='*60}\n")
return results
def fetch_history_daily(plant_info, start_date, end_date):
"""
CMSolar 발전소의 일별 과거 데이터 수집
실제 엔드포인트: /plant/sub/report_ok.php (HTML 테이블 응답)
파라미터: mode=getPowers&type=month&device=total&start=YYYY-MM-01&money=
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
results = []
plant_id = plant_info.get('id', 'cmsolar-10')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '10호기')
login_id = auth.get('login_id', '')
login_pw = auth.get('login_pw', '')
site_no = auth.get('site_no', '')
login_url = system.get('login_url', '')
# 실제 데이터 엔드포인트
base_url = system.get('api_url', 'http://www.cmsolar2.kr')
data_url = f"{base_url}/plant/sub/report_ok.php"
session = create_session()
print(f"\n{'='*60}")
print(f"[CMSolar Daily] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {
'login_id': login_id,
'login_pw': login_pw,
'site_no': site_no
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 사이트 선택 (필수!)
try:
change_url = f"{base_url}/change.php?site={site_no}"
session.get(change_url, headers=headers)
print(" ✓ Site selected")
except Exception as e:
print(f" ✗ Site selection error: {e}")
return results
# 월 단위로 반복 (type=month는 한 달 치 일별 데이터 반환)
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
month_start = current_date.strftime('%Y-%m-01')
year = current_date.year
month = current_date.month
# 실제 확인된 일별 엔드포인트 (type=month)
params = {
'mode': 'getPowers',
'type': 'month',
'device': 'total',
'start': month_start,
'money': ''
}
try:
res = session.get(data_url, params=params, headers=headers, timeout=10)
res.encoding = 'utf-8'
if res.status_code == 200:
# HTML 테이블 파싱
html = res.text
# <tbody> 안의 <tr> 태그 찾기
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
if tbody_match:
tbody_content = tbody_match.group(1)
# 각 <tr> 파싱 (날짜와 발전량)
# <tr class="odd"><td>1</td><td>136.00</td>...
tr_pattern = r'<tr[^>]*>\s*<td>(\d+)</td>\s*<td>([\d.,]+)</td>'
matches = re.findall(tr_pattern, tbody_content)
if matches:
print(f" ✓ Found {len(matches)} daily records for {month_start[:7]}")
for day, kwh in matches:
# 쉼표 제거
kwh_clean = kwh.replace(',', '')
generation_kwh = safe_float(kwh_clean)
date_str = f"{year:04d}-{month:02d}-{int(day):02d}"
# 날짜 범위 필터링
if date_str >= start_date and date_str <= end_date:
results.append({
'plant_id': plant_id,
'date': date_str,
'generation_kwh': generation_kwh,
'current_kw': 0
})
print(f"{date_str}: {generation_kwh:.2f}kWh")
else:
print(f" ⚠ No tbody found for {month_start[:7]}")
else:
print(f" ✗ HTTP {res.status_code} for {month_start[:7]}")
except Exception as e:
print(f" ✗ Error for {month_start[:7]}: {e}")
# 다음 달로 이동
current_date = (current_date.replace(day=1) + relativedelta(months=1))
print(f"[Total] Collected {len(results)} daily records\n")
return results
def fetch_history_monthly(plant_info, start_month, end_month):
"""
CMSolar 발전소의 월별 과거 데이터 수집
실제 엔드포인트: /plant/sub/report_ok.php (HTML 테이블 응답)
파라미터: mode=getPowers&type=year&device=total&start=YYYY-01-01&money=
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
results = []
plant_id = plant_info.get('id', 'cmsolar-10')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '10호기')
# 시작일자 체크
plant_start_date = plant_info.get('start_date', '2020-08-31')
plant_start_month = plant_start_date[:7] # YYYY-MM
# 실제 시작 월은 발전소 가동일 이후로 제한
if start_month < plant_start_month:
actual_start = plant_start_month
print(f" 발전소 가동일({plant_start_date}) 이후부터 수집: {actual_start}")
else:
actual_start = start_month
login_id = auth.get('login_id', '')
login_pw = auth.get('login_pw', '')
site_no = auth.get('site_no', '')
login_url = system.get('login_url', '')
# 실제 데이터 엔드포인트
base_url = system.get('api_url', 'http://www.cmsolar2.kr')
data_url = f"{base_url}/plant/sub/report_ok.php"
session = create_session()
print(f"\n{'='*60}")
print(f"[CMSolar Monthly] {plant_name} ({actual_start} ~ {end_month})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {
'login_id': login_id,
'login_pw': login_pw,
'site_no': site_no
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 사이트 선택 (필수!)
try:
change_url = f"{base_url}/change.php?site={site_no}"
session.get(change_url, headers=headers)
print(" ✓ Site selected")
except Exception as e:
print(f" ✗ Site selection error: {e}")
return results
# 연도별로 반복 (type=year는 한 해 치 월별 데이터 반환)
current_month = datetime.strptime(actual_start, '%Y-%m')
end_month_dt = datetime.strptime(end_month, '%Y-%m')
processed_years = set()
while current_month <= end_month_dt:
year = current_month.year
# 이미 처리한 연도는 스킵
if year in processed_years:
current_month += relativedelta(months=1)
continue
processed_years.add(year)
year_start = f"{year}-01-01"
# 실제 확인된 월별 엔드포인트 (type=year)
params = {
'mode': 'getPowers',
'type': 'year',
'device': 'total',
'start': year_start,
'money': ''
}
try:
res = session.get(data_url, params=params, headers=headers, timeout=10)
res.encoding = 'utf-8'
if res.status_code == 200:
# HTML 테이블 파싱
html = res.text
# <tbody> 안의 <tr> 태그 찾기
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
if tbody_match:
tbody_content = tbody_match.group(1)
# 각 <tr> 파싱 (월과 발전량)
# <tr class="even"><td>1</td><td>2,836.00</td>...
tr_pattern = r'<tr[^>]*>\s*<td>(\d+)</td>\s*<td>([\d.,]+)</td>'
matches = re.findall(tr_pattern, tbody_content)
if matches:
year_count = 0
for month, kwh in matches:
# 쉼표 제거
kwh_clean = kwh.replace(',', '')
generation_kwh = safe_float(kwh_clean)
month_str = f"{year:04d}-{int(month):02d}"
# 월 범위 필터링
if month_str >= actual_start and month_str <= end_month:
results.append({
'plant_id': plant_id,
'month': month_str,
'generation_kwh': generation_kwh
})
print(f"{month_str}: {generation_kwh:.1f}kWh")
year_count += 1
if year_count > 0:
print(f" → Collected {year_count} months from {year}")
else:
print(f" ⚠ No tbody found for year {year}")
else:
print(f" ✗ HTTP {res.status_code} for year {year}")
except Exception as e:
print(f" ✗ Error for year {year}: {e}")
# 다음 연도로 이동
current_month = current_month.replace(year=year+1, month=1)
print(f"[Total] Collected {len(results)} monthly records\n")
return results

319
crawlers/cmsolar_old.py Normal file
View File

@ -0,0 +1,319 @@
# ==========================================
# crawlers/cmsolar.py - CMSolar 크롤러 (10호기)
# ==========================================
import requests
from .base import create_session
def fetch_data(plant_info):
"""
CMSolar 발전소 데이터 수집
"""
plant_id = plant_info.get('id', 'cmsolar-10')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
company_name = plant_info.get('company_name', '태양과바람')
plant_name = plant_info.get('name', '10호기')
login_id = auth.get('login_id', '')
login_pw = auth.get('login_pw', '')
site_no = auth.get('site_no', '')
base_url = system.get('base_url', '')
session = create_session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/143.0.0.0 Safari/537.36',
'Referer': f'{base_url}/plant/index.php'
}
# 1. 로그인
try:
login_data = {'id': login_id, 'pw': login_pw, 'commit': 'Login'}
session.post(f"{base_url}/login_ok.php", data=login_data, headers=headers)
except:
return []
# 2. 사이트 선택
try:
session.get(f"{base_url}/change.php?site={site_no}", headers=headers)
except:
return []
# 3. 데이터 요청
target_url = f"{base_url}/plant/sub/idx_ok.php?mode=getPlant"
try:
res = session.get(target_url, headers=headers)
res.encoding = 'utf-8'
data = res.json()
plant_data = data[0]['plant']
# 단위 변환 (W -> kW, Wh -> kWh)
curr_kw = float(plant_data.get('now', 0)) / 1000
today_kwh = float(plant_data.get('today', 0)) / 1000
is_error = int(plant_data.get('inv_error', 0))
status = "🟢 정상" if is_error == 0 else "🔴 점검/고장"
print(f" [CMSolar] {plant_name} 수집 완료: {round(curr_kw, 2)} kW")
return [{
'id': plant_id,
'name': f'{company_name} {plant_name}',
'kw': round(curr_kw, 2),
'today': round(today_kwh, 2),
'status': status
}]
except Exception as e:
print(f"{plant_name} 에러: {e}")
return []
def fetch_history_daily(plant_info, start_date, end_date):
"""
CMSolar 발전소의 일별 과거 데이터 수집
"""
from datetime import datetime, timedelta
from .base import safe_float
results = []
plant_id = plant_info.get('id', 'cmsolar-10')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '10호기')
login_id = auth.get('login_id', '')
login_pw = auth.get('login_pw', '')
site_no = auth.get('site_no', '')
base_url = system.get('base_url', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[CMSolar Daily] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Referer': f'{base_url}/plant/index.php'
}
try:
login_data = {'id': login_id, 'pw': login_pw, 'commit': 'Login'}
session.post(f"{base_url}/login_ok.php", data=login_data, headers=headers)
session.get(f"{base_url}/change.php?site={site_no}", headers=headers)
print(" ✓ Login successful")
except Exception as e:
print(f" ✗ Login failed: {e}")
return results
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
# 일별 데이터 엔드포인트 (추정)
daily_url = f"{base_url}/plant/sub/daily_data.php?date={date_str}"
try:
res = session.get(daily_url, headers=headers, timeout=10)
res.encoding = 'utf-8'
if res.status_code == 200:
data = res.json()
daily_kwh = safe_float(data.get('today', data.get('daily', 0))) / 1000.0
results.append({
'plant_id': plant_id,
'date': date_str,
'generation_kwh': daily_kwh
})
print(f"{date_str}: {daily_kwh}kWh")
except Exception as e:
print(f"{date_str}: {e}")
current_date += timedelta(days=1)
print(f"[Total] Collected {len(results)} daily records\n")
return results
def fetch_history_monthly(plant_info, start_month, end_month):
"""
CMSolar 발전소의 월별 과거 데이터 수집
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
from .base import safe_float
results = []
plant_id = plant_info.get('id', 'cmsolar-10')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '10호기')
login_id = auth.get('login_id', '')
login_pw = auth.get('login_pw', '')
site_no = auth.get('site_no', '')
base_url = system.get('base_url', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[CMSolar Monthly] {plant_name} ({start_month} ~ {end_month})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Referer': f'{base_url}/plant/index.php'
}
try:
login_data = {'id': login_id, 'pw': login_pw, 'commit': 'Login'}
session.post(f"{base_url}/login_ok.php", data=login_data, headers=headers)
session.get(f"{base_url}/change.php?site={site_no}", headers=headers)
print(" ✓ Login successful")
except Exception as e:
print(f" ✗ Login failed: {e}")
return results
current_month = datetime.strptime(start_month, '%Y-%m')
end_month_dt = datetime.strptime(end_month, '%Y-%m')
while current_month <= end_month_dt:
month_str = current_month.strftime('%Y-%m')
# 월별 데이터 엔드포인트 (추정)
monthly_url = f"{base_url}/plant/sub/monthly_data.php?month={month_str}"
try:
res = session.get(monthly_url, headers=headers, timeout=10)
res.encoding = 'utf-8'
if res.status_code == 200:
data = res.json()
monthly_kwh = safe_float(data.get('month', data.get('monthly', 0))) / 1000.0
results.append({
'plant_id': plant_id,
'month': month_str,
'generation_kwh': monthly_kwh
})
print(f"{month_str}: {monthly_kwh}kWh")
except Exception as e:
print(f"{month_str}: {e}")
current_month += relativedelta(months=1)
print(f"[Total] Collected {len(results)} monthly records\n")
return results
def fetch_history_hourly(plant_info, start_date, end_date):
"""
CMSolar 발전소의 시간대별 과거 데이터 수집
Args:
plant_info: dict, 발전소 정보
start_date: str, 시작일 (YYYY-MM-DD)
end_date: str, 종료일 (YYYY-MM-DD)
Returns:
list: 시간대별 데이터 레코드
"""
from datetime import datetime, timedelta
from .base import safe_float
results = []
# 설정 추출
plant_id = plant_info.get('id', 'cmsolar-10')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '10호기')
login_id = auth.get('login_id', '')
login_pw = auth.get('login_pw', '')
site_no = auth.get('site_no', '')
base_url = system.get('base_url', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[CMSolar History] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
# 로그인
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/143.0.0.0 Safari/537.36',
'Referer': f'{base_url}/plant/index.php'
}
try:
login_data = {'id': login_id, 'pw': login_pw, 'commit': 'Login'}
session.post(f"{base_url}/login_ok.php", data=login_data, headers=headers)
# 사이트 선택
session.get(f"{base_url}/change.php?site={site_no}", headers=headers)
print(f" ✓ Login successful")
except Exception as e:
print(f" ✗ Login failed: {e}")
return results
# 날짜 범위 반복
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
print(f"\n[Processing Date] {date_str}")
# 시간대별 데이터 엔드포인트 (추정)
hourly_url = f"{base_url}/plant/sub/hourly_data.php?site={site_no}&date={date_str}"
try:
res = session.get(hourly_url, headers=headers, timeout=10)
res.encoding = 'utf-8'
if res.status_code == 200:
data = res.json()
hourly_data = data if isinstance(data, list) else data.get('hourly', [])
if hourly_data and len(hourly_data) > 0:
print(f" ✓ Found {len(hourly_data)} hourly records")
for item in hourly_data:
hour = str(item.get('hour', item.get('time', '00'))).zfill(2)
generation_wh = safe_float(item.get('energy', item.get('now', 0)))
generation_kwh = generation_wh / 1000.0 if generation_wh > 1000 else generation_wh
current_kw = safe_float(item.get('power', 0)) / 1000.0
timestamp = f"{date_str} {hour}:00:00"
results.append({
'plant_id': plant_id,
'timestamp': timestamp,
'generation_kwh': generation_kwh,
'current_kw': current_kw
})
else:
print(f" ⚠ No hourly data for {date_str}")
else:
print(f" ✗ HTTP {res.status_code}")
except Exception as e:
print(f" ✗ Error: {e}")
# 다음 날짜로
current_date += timedelta(days=1)
print(f"\n{'='*60}")
print(f"[Total] Collected {len(results)} hourly records")
print(f"{'='*60}\n")
return results

427
crawlers/cmsolar_old2.py Normal file
View File

@ -0,0 +1,427 @@
# ==========================================
# crawlers/cmsolar.py - CMSolar 크롤러 (10호기)
# ==========================================
import requests
from .base import create_session, safe_float
def fetch_data(plant_info):
"""
CMSolar 발전소 데이터 수집
"""
plant_id = plant_info.get('id', 'cmsolar-10')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
company_name = plant_info.get('company_name', '함안햇빛발전소')
plant_name = plant_info.get('name', '10호기')
login_id = auth.get('payload_id', '')
login_pw = auth.get('payload_pw', '')
site_no = auth.get('site_no', '')
login_url = system.get('login_url', '')
data_url = system.get('data_url', '')
session = create_session()
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded'
}
# 로그인
login_data = {
'login_id': login_id,
'login_pw': login_pw,
'site_no': site_no
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code != 200:
return []
except Exception as e:
print(f"{plant_name} 접속 에러: {e}")
return []
# 데이터 요청
try:
res = session.get(data_url, headers=headers)
if res.status_code == 200:
data = res.json()
curr_kw = safe_float(data.get('current', data.get('power', 0)))
today_kwh = safe_float(data.get('today', data.get('generation', 0)))
status = "🟢 정상" if curr_kw > 0 else "💤 대기"
return [{
'id': plant_id,
'name': f'{company_name} {plant_name}',
'kw': curr_kw,
'today': today_kwh,
'status': status
}]
else:
return []
except Exception as e:
print(f"{plant_name} 에러: {e}")
return []
def fetch_history_hourly(plant_info, start_date, end_date):
"""
CMSolar 발전소의 시간대별 과거 데이터 수집
실제 엔드포인트: /plant/sub/report_ok.php
파라미터: mode=getPowers&type=daily&device=total&start=YYYY-MM-DD&money=
"""
from datetime import datetime, timedelta
results = []
plant_id = plant_info.get('id', 'cmsolar-10')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '10호기')
login_id = auth.get('payload_id', '')
login_pw = auth.get('payload_pw', '')
site_no = auth.get('site_no', '')
login_url = system.get('login_url', '')
# 실제 데이터 엔드포인트
base_url = system.get('api_url', 'http://www.cmsolar2.kr')
data_url = f"{base_url}/plant/sub/report_ok.php"
session = create_session()
print(f"\n{'='*60}")
print(f"[CMSolar History] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {
'login_id': login_id,
'login_pw': login_pw,
'site_no': site_no
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 사이트 선택 (필수!)
try:
change_url = f"{base_url}/change.php?site={site_no}"
session.get(change_url, headers=headers)
print(" ✓ Site selected")
except Exception as e:
print(f" ✗ Site selection error: {e}")
return results
# 날짜 반복
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
# 실제 확인된 시간별 엔드포인트 (type=daily는 하루 치 시간별 데이터 반환)
params = {
'mode': 'getPowers',
'type': 'daily',
'device': 'total',
'start': date_str,
'money': ''
}
try:
res = session.get(data_url, params=params, headers=headers, timeout=10)
if res.status_code == 200:
data = res.json()
# 시간별 데이터 파싱
hourly_data = data.get('data', []) or data.get('list', []) or data.get('powers', [])
if isinstance(hourly_data, list) and len(hourly_data) > 0:
print(f" ✓ Found {len(hourly_data)} hourly records for {date_str}")
for item in hourly_data:
hour = str(item.get('hour', item.get('time', '00'))).zfill(2)
generation_kwh = safe_float(item.get('power', item.get('generation', item.get('kwh', 0))))
current_kw = safe_float(item.get('kw', 0))
timestamp = f"{date_str} {hour}:00:00"
results.append({
'plant_id': plant_id,
'timestamp': timestamp,
'generation_kwh': generation_kwh,
'current_kw': current_kw
})
else:
print(f" ⚠ No data for {date_str}")
else:
print(f" ✗ HTTP {res.status_code} for {date_str}")
except Exception as e:
print(f" ✗ Error: {e}")
current_date += timedelta(days=1)
print(f"\n{'='*60}")
print(f"[Total] Collected {len(results)} hourly records")
print(f"{'='*60}\n")
return results
def fetch_history_daily(plant_info, start_date, end_date):
"""
CMSolar 발전소의 일별 과거 데이터 수집
실제 엔드포인트: /plant/sub/report_ok.php
파라미터: mode=getPowers&type=month&device=total&start=YYYY-MM-DD&money=
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
results = []
plant_id = plant_info.get('id', 'cmsolar-10')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '10호기')
login_id = auth.get('payload_id', '')
login_pw = auth.get('payload_pw', '')
site_no = auth.get('site_no', '')
login_url = system.get('login_url', '')
# 실제 데이터 엔드포인트
base_url = system.get('api_url', 'http://www.cmsolar2.kr')
data_url = f"{base_url}/plant/sub/report_ok.php"
session = create_session()
print(f"\n{'='*60}")
print(f"[CMSolar Daily] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {
'login_id': login_id,
'login_pw': login_pw,
'site_no': site_no
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 사이트 선택 (필수!)
try:
change_url = f"{base_url}/change.php?site={site_no}"
session.get(change_url, headers=headers)
print(" ✓ Site selected")
except Exception as e:
print(f" ✗ Site selection error: {e}")
return results
# 월 단위로 반복 (type=month는 한 달 치 일별 데이터 반환)
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
month_start = current_date.strftime('%Y-%m-01')
# 실제 확인된 일별 엔드포인트 (type=month)
params = {
'mode': 'getPowers',
'type': 'month',
'device': 'total',
'start': month_start,
'money': ''
}
try:
res = session.get(data_url, params=params, headers=headers, timeout=10)
if res.status_code == 200:
data = res.json()
# 일별 데이터 파싱
daily_data = data.get('data', []) or data.get('list', []) or data.get('powers', [])
if isinstance(daily_data, list) and len(daily_data) > 0:
print(f" ✓ Found {len(daily_data)} daily records for {month_start[:7]}")
for item in daily_data:
date_str = item.get('date', item.get('day', ''))
generation_kwh = safe_float(item.get('power', item.get('generation', item.get('kwh', 0))))
current_kw = safe_float(item.get('kw', 0))
# 날짜 범위 필터링
if date_str >= start_date and date_str <= end_date:
results.append({
'plant_id': plant_id,
'date': date_str,
'generation_kwh': generation_kwh,
'current_kw': current_kw
})
print(f"{date_str}: {generation_kwh:.2f}kWh")
else:
print(f" ✗ HTTP {res.status_code} for {month_start[:7]}")
except Exception as e:
print(f" ✗ Error: {e}")
# 다음 달로 이동
current_date = (current_date.replace(day=1) + relativedelta(months=1))
print(f"[Total] Collected {len(results)} daily records\n")
return results
def fetch_history_monthly(plant_info, start_month, end_month):
"""
CMSolar 발전소의 월별 과거 데이터 수집
실제 엔드포인트: /plant/sub/report_ok.php
파라미터: mode=getPowers&type=year&device=total&start=YYYY-MM-DD&money=
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
results = []
plant_id = plant_info.get('id', 'cmsolar-10')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '10호기')
login_id = auth.get('payload_id', '')
login_pw = auth.get('payload_pw', '')
site_no = auth.get('site_no', '')
login_url = system.get('login_url', '')
# 실제 데이터 엔드포인트
base_url = system.get('api_url', 'http://www.cmsolar2.kr')
data_url = f"{base_url}/plant/sub/report_ok.php"
session = create_session()
print(f"\n{'='*60}")
print(f"[CMSolar Monthly] {plant_name} ({start_month} ~ {end_month})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {
'login_id': login_id,
'login_pw': login_pw,
'site_no': site_no
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 사이트 선택 (필수!)
try:
change_url = f"{base_url}/change.php?site={site_no}"
session.get(change_url, headers=headers)
print(" ✓ Site selected")
except Exception as e:
print(f" ✗ Site selection error: {e}")
return results
# 연도별로 반복 (type=year는 한 해 치 월별 데이터 반환)
current_date = datetime.strptime(start_month + '-01', '%Y-%m-%d')
end_date = datetime.strptime(end_month + '-01', '%Y-%m-%d')
years_processed = set()
while current_date <= end_date:
year_start = current_date.strftime('%Y-01-01')
year = current_date.year
# 중복 연도 스킵
if year in years_processed:
current_date += relativedelta(months=1)
continue
years_processed.add(year)
# 실제 확인된 월별 엔드포인트 (type=year)
params = {
'mode': 'getPowers',
'type': 'year',
'device': 'total',
'start': year_start,
'money': ''
}
try:
res = session.get(data_url, params=params, headers=headers, timeout=10)
if res.status_code == 200:
data = res.json()
# 월별 데이터 파싱
monthly_data = data.get('data', []) or data.get('list', []) or data.get('powers', [])
if isinstance(monthly_data, list) and len(monthly_data) > 0:
print(f" ✓ Found {len(monthly_data)} monthly records for {year}")
for item in monthly_data:
month_str = item.get('month', item.get('date', ''))
generation_kwh = safe_float(item.get('power', item.get('generation', item.get('kwh', 0))))
# YYYY-MM 형식으로 정규화
if len(month_str) >= 7:
month_str = month_str[:7]
# 월 범위 필터링
if month_str >= start_month and month_str <= end_month:
results.append({
'plant_id': plant_id,
'month': month_str,
'generation_kwh': generation_kwh
})
print(f"{month_str}: {generation_kwh:.1f}kWh")
else:
print(f" ✗ HTTP {res.status_code} for {year}")
except Exception as e:
print(f" ✗ Error: {e}")
current_date += relativedelta(months=1)
print(f"[Total] Collected {len(results)} monthly records\n")
return results

489
crawlers/hyundai.py Normal file
View File

@ -0,0 +1,489 @@
# ==========================================
# crawlers/hyundai.py - 현대 크롤러 (8호기)
# ==========================================
import requests
from .base import create_session
def fetch_data(plant_info):
"""
현대 발전소 데이터 수집 (Hi-Smart 3.0)
"""
plant_id = plant_info.get('id', 'hyundai-08')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
company_name = plant_info.get('company_name', '태양과바람')
plant_name = plant_info.get('name', '8호기')
user_id = auth.get('user_id', '')
password = auth.get('password', '')
site_id = auth.get('site_id', '')
base_url = system.get('base_url', '')
login_path = system.get('login_path', '')
data_path = system.get('data_path', '')
session = create_session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Content-Type': 'application/json;charset=UTF-8',
'Accept': 'application/json, text/plain, */*',
'Origin': base_url,
'Referer': f'{base_url}/',
'X-ApiVersion': 'v1.0',
'X-App': 'HIWAY4VUETIFY',
'X-CallType': '0',
'X-Channel': 'WEB_PC',
'X-Lang': 'ko',
'X-Mid': 'login',
'X-VName': 'UI'
}
# 로그인
login_urls = [
f"{base_url}{login_path}",
f"{base_url}{login_path}.json",
f"{base_url}{login_path}.do"
]
login_success = False
for url in login_urls:
try:
payload = {"user_id": user_id, "password": password}
res = session.post(url, json=payload, headers=headers)
if res.status_code == 200:
auth_token = res.headers.get('x-auth-token')
if auth_token:
headers['x-auth-token'] = auth_token
print(f" [현대] 로그인 성공 & 토큰 확보!")
login_success = True
break
except Exception:
continue
if not login_success:
print(f"❌ 현대 {plant_name} 로그인 실패")
return []
# 데이터 요청
try:
data_url = f"{base_url}{data_path}"
params = {'site_id': site_id}
# 데이터 요청용 헤더 업데이트
headers['X-Channel'] = 'WEB_PCWeb'
headers['X-Mid'] = 'siteWork'
res = session.get(data_url, params=params, headers=headers)
if res.status_code != 200:
print(f"❌ 현대 데이터 요청 실패 (코드: {res.status_code})")
return []
data = res.json()
if 'datas' in data and 'unitedSiteInfo' in data['datas']:
info = data['datas']['unitedSiteInfo']
curr_kw = float(info.get('PVPCS_Pac', '0').replace(',', ''))
today_kwh = float(info.get('PVPCS_Daily_P', '0').replace(',', ''))
print(f" [현대] {plant_name} 데이터: {curr_kw}kW / {today_kwh}kWh")
return [{
'id': plant_id,
'name': f'{company_name} {plant_name}',
'kw': curr_kw,
'today': today_kwh,
'status': "🟢 정상" if curr_kw > 0 else "💤 대기"
}]
else:
print(f"⚠️ 현대 데이터 구조가 다릅니다.")
return []
except Exception as e:
print(f"❌ 현대 파싱 에러: {e}")
return []
def fetch_history_hourly(plant_info, start_date, end_date):
"""
현대 발전소의 시간대별 과거 데이터 수집
Args:
plant_info: {
'id': 'hyundai-08',
'name': '8호기',
'type': 'hyundai',
'auth': {'user_id': '...', 'password': '...', 'site_id': '...'},
'system': {'base_url': '...', 'login_path': '...', 'data_path': '...'},
'company_name': '태양과바람'
}
start_date: str, 시작일 (YYYY-MM-DD)
end_date: str, 종료일 (YYYY-MM-DD)
Returns:
list: [{
'plant_id': 'hyundai-08',
'timestamp': '2026-01-15 14:00:00',
'generation_kwh': 123.5,
'current_kw': 15.2
}, ...]
"""
from datetime import datetime, timedelta
from .base import safe_float
results = []
# 설정 추출
plant_id = plant_info.get('id', 'hyundai-08')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '8호기')
user_id = auth.get('user_id', '')
password = auth.get('password', '')
site_id = auth.get('site_id', '')
base_url = system.get('base_url', '')
login_path = system.get('login_path', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[Hyundai History] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
# 로그인
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Content-Type': 'application/json;charset=UTF-8',
'Accept': 'application/json, text/plain, */*',
'Origin': base_url,
'Referer': f'{base_url}/',
'X-ApiVersion': 'v1.0',
'X-App': 'HIWAY4VUETIFY',
'X-CallType': '0',
'X-Channel': 'WEB_PC',
'X-Lang': 'ko',
'X-Mid': 'login',
'X-VName': 'UI'
}
login_urls = [
f"{base_url}{login_path}",
f"{base_url}{login_path}.json",
f"{base_url}{login_path}.do"
]
login_success = False
for url in login_urls:
try:
payload = {"user_id": user_id, "password": password}
res = session.post(url, json=payload, headers=headers)
if res.status_code == 200:
auth_token = res.headers.get('x-auth-token')
if auth_token:
headers['x-auth-token'] = auth_token
print(f" ✓ Login successful")
login_success = True
break
except Exception:
continue
if not login_success:
print(f" ✗ Login failed")
return results
# 날짜 범위 반복
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
headers['X-Mid'] = 'siteWork'
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
print(f"\n[Processing Date] {date_str}")
# getSolraDayWork 엔드포인트 사용 (20분 간격 데이터)
url = f"{base_url}/hismart/site/getSolraDayWork"
params = {
'site_id': site_id,
'startDate': date_str # YYYY-MM-DD 형식
}
try:
res = session.get(url, params=params, headers=headers, timeout=10)
if res.status_code == 200:
data = res.json()
# solraDayWork 구조 파싱
day_work = data.get('datas', {}).get('solraDayWork', {})
run_data = day_work.get('runData', [])
run_time = day_work.get('runTime', [])
if run_data and run_time and len(run_data) == len(run_time):
print(f" ✓ Found {len(run_data)} records (20-min intervals)")
# runData와 runTime을 조합하여 시간대별 데이터 생성
for i in range(len(run_data)):
time_str = run_time[i] # "14:20" 형식
generation_kw = safe_float(run_data[i]) # kW 값
# timestamp 생성
timestamp = f"{date_str} {time_str}:00"
# 20분 간격 데이터를 그대로 저장 (또는 시간 단위로 집계 가능)
results.append({
'plant_id': plant_id,
'timestamp': timestamp,
'generation_kwh': generation_kw, # 실제로는 순간 kW값
'current_kw': generation_kw
})
print(f" → Collected {len(run_data)} records")
else:
print(f" ⚠ No data for {date_str}")
else:
print(f" ✗ HTTP {res.status_code}")
except Exception as e:
print(f" ✗ Error: {e}")
# 다음 날짜로
current_date += timedelta(days=1)
print(f"\n{'='*60}")
print(f"[Total] Collected {len(results)} records")
print(f"{'='*60}\n")
return results
def fetch_history_daily(plant_info, start_date, end_date):
"""
현대 발전소의 일별 과거 데이터 수집 ( 단위 최적화)
getSolraMonthWork API를 사용하여 달치 일별 데이터를 번에 가져옴
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
from .base import safe_float
import calendar
results = []
plant_id = plant_info.get('id', 'hyundai-08')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '8호기')
user_id = auth.get('user_id', '')
password = auth.get('password', '')
site_id = auth.get('site_id', '')
base_url = system.get('base_url', '')
login_path = system.get('login_path', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[Hyundai Daily] {plant_name} ({start_date} ~ {end_date}) - Looping by Month")
print(f"{'='*60}")
# 로그인
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/json;charset=UTF-8',
'X-ApiVersion': 'v1.0',
'X-App': 'HIWAY4VUETIFY',
'X-Channel': 'WEB_PC',
'X-Lang': 'ko',
'X-Mid': 'login',
'X-VName': 'UI'
}
login_url = f"{base_url}{login_path}"
payload = {"user_id": user_id, "password": password}
try:
res = session.post(login_url, json=payload, headers=headers)
auth_token = res.headers.get('x-auth-token')
if not auth_token:
print(" ✗ Login failed")
return results
headers['x-auth-token'] = auth_token
headers['X-Mid'] = 'siteWork'
print(" ✓ Login successful")
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 월 단위 반복
current_month = datetime.strptime(start_date[:7], '%Y-%m') # YYYY-MM-01
end_month_dt = datetime.strptime(end_date[:7], '%Y-%m')
while current_month <= end_month_dt:
month_str = current_month.strftime('%Y-%m')
year = current_month.year
month = current_month.month
print(f" [Fetching] {month_str} ...", end="", flush=True)
url = f"{base_url}/hismart/site/getSolraMonthWork"
params = {'site_id': site_id, 'month': month_str}
try:
res = session.get(url, params=params, headers=headers, timeout=10)
if res.status_code == 200:
data = res.json()
day_work = data.get('datas', {}).get('solraMonthWork', {})
run_data = day_work.get('runData', [])
if run_data:
count = 0
for day_idx, val in enumerate(run_data):
day = day_idx + 1
daily_total = safe_float(val)
# 유효한 날짜인지 확인 (예: 2월 30일 방지)
try:
# 해당 월의 마지막 날짜 확인
last_day = calendar.monthrange(year, month)[1]
if day > last_day:
continue
date_str = f"{year}-{month:02d}-{day:02d}"
# 요청된 날짜 범위 내인지 확인
if date_str >= start_date and date_str <= end_date:
results.append({
'plant_id': plant_id,
'date': date_str,
'generation_kwh': round(daily_total, 2)
})
count += 1
except ValueError:
continue
print(f" OK ({count} days)")
else:
print(f" No data")
else:
print(f" HTTP {res.status_code}")
except Exception as e:
print(f" Error: {e}")
current_month += relativedelta(months=1)
print(f"\n[Total] Collected {len(results)} daily records\n")
return results
def fetch_history_monthly(plant_info, start_month, end_month):
"""
현대 발전소의 월별 과거 데이터 수집
Args:
plant_info: 발전소 정보
start_month: str, 시작월 (YYYY-MM)
end_month: str, 종료월 (YYYY-MM)
Returns:
list: [{'plant_id': '...', 'month': '2026-01', 'generation_kwh': 12345.6}, ...]
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
from .base import safe_float
results = []
plant_id = plant_info.get('id', 'hyundai-08')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '8호기')
user_id = auth.get('user_id', '')
password = auth.get('password', '')
site_id = auth.get('site_id', '')
base_url = system.get('base_url', '')
login_path = system.get('login_path', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[Hyundai Monthly] {plant_name} ({start_month} ~ {end_month})")
print(f"{'='*60}")
# 로그인
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/json;charset=UTF-8',
'X-ApiVersion': 'v1.0',
'X-App': 'HIWAY4VUETIFY',
'X-Channel': 'WEB_PC',
'X-Lang': 'ko',
'X-Mid': 'login',
'X-VName': 'UI'
}
login_url = f"{base_url}{login_path}"
payload = {"user_id": user_id, "password": password}
res = session.post(login_url, json=payload, headers=headers)
auth_token = res.headers.get('x-auth-token')
if not auth_token:
print(" ✗ Login failed")
return results
headers['x-auth-token'] = auth_token
headers['X-Mid'] = 'siteWork'
print(" ✓ Login successful")
current_month = datetime.strptime(start_month, '%Y-%m')
end_month_dt = datetime.strptime(end_month, '%Y-%m')
while current_month <= end_month_dt:
month_str = current_month.strftime('%Y-%m')
try:
# 실제 확인된 월별 엔드포인트: getSolraMonthWork
url = f"{base_url}/hismart/site/getSolraMonthWork"
params = {
'site_id': site_id,
'month': month_str # YYYY-MM 형식
}
res = session.get(url, params=params, headers=headers, verify=False, timeout=10)
if res.status_code == 200:
data = res.json()
# 응답 구조: datas.solraMonthWork.runData = 일별 발전량 배열
if 'datas' in data and 'solraMonthWork' in data['datas']:
month_data = data['datas']['solraMonthWork']
run_data = month_data.get('runData', [])
# runData는 해당 월의 일별 발전량 배열 → 합산
monthly_kwh = sum(run_data) if run_data else 0.0
print(f"{month_str}: {monthly_kwh:.1f}kWh (from {len(run_data)} days)")
results.append({
'plant_id': plant_id,
'month': month_str,
'generation_kwh': monthly_kwh
})
except Exception as e:
print(f"{month_str}: {e}")
current_month += relativedelta(months=1)
print(f"[Total] Collected {len(results)} monthly records\n")
return results

559
crawlers/kremc.py Normal file
View File

@ -0,0 +1,559 @@
# ==========================================
# crawlers/kremc.py - KREMC 크롤러 (5호기)
# ==========================================
import requests
import urllib.parse
from .base import safe_float, create_session
def fetch_data(plant_info):
"""
KREMC 발전소 데이터 수집
"""
# 설정 추출
plant_id = plant_info.get('id', 'kremc-05')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
company_name = plant_info.get('company_name', '태양과바람')
plant_name = plant_info.get('name', '5호기')
user_id = auth.get('user_id', '')
password = auth.get('password', '')
login_url = system.get('login_url', '')
api_base = system.get('api_base', '')
enso_type = system.get('enso_type', '15001')
try:
session = create_session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Content-Type': 'application/json',
'Accept': 'application/json, text/plain, */*',
'Origin': 'https://kremc.kr',
'Referer': 'https://kremc.kr/login'
}
# 1. 로그인
login_data = {'userId': user_id, 'password': password}
login_res = session.post(login_url, json=login_data, headers=headers, timeout=10)
if login_res.status_code != 200:
print(f" ⚠️ KREMC 로그인 실패: {login_res.status_code}")
return []
try:
login_json = login_res.json()
if login_json.get('status') == 200 or login_json.get('code') == 'S001':
data = login_json.get('data')
if isinstance(data, str) and len(data) > 10:
token = data
elif isinstance(data, dict):
token = data.get('token') or data.get('accessToken') or data.get('jwt')
if not token:
return []
else:
return []
else:
print(f" ⚠️ KREMC 로그인 실패: {login_json.get('message', 'Unknown')}")
return []
except:
return []
print(f" [KREMC] 토큰 획득 성공")
# 2. API 헤더 설정
api_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'application/json',
'X-Auth-Token': token
}
installer_id_encoded = urllib.parse.quote(user_id)
# 3. 실시간 발전량 (kW)
latest_url = f"{api_base}/monitor/installer/gath/latest?installerId={installer_id_encoded}&ensoTypeCode={enso_type}"
latest_res = session.get(latest_url, headers=api_headers, timeout=10)
current_kw = 0.0
if latest_res.status_code == 200:
try:
latest_data = latest_res.json()
data = latest_data.get('data', {})
if isinstance(data, dict):
watts = safe_float(data.get('outpElcpFigr', 0))
current_kw = watts / 1000.0 if watts > 0 else 0.0
except:
pass
# 4. 일일 발전량 (kWh)
energy_url = f"{api_base}/monitor/installer/gath/energy?installerId={installer_id_encoded}&ensoTypeCode={enso_type}&cid="
energy_res = session.get(energy_url, headers=api_headers, timeout=10)
today_kwh = 0.0
if energy_res.status_code == 200:
try:
energy_data = energy_res.json()
data = energy_data.get('data', {})
if isinstance(data, dict):
today_kwh = safe_float(data.get('dayEnergy', 0))
except:
pass
print(f" [KREMC] {plant_name} 데이터: {current_kw} kW / {today_kwh} kWh")
return [{
'id': plant_id,
'name': f'{company_name} {plant_name}',
'kw': current_kw,
'today': today_kwh,
'status': '🟢 정상' if current_kw > 0 else '💤 대기'
}]
except Exception as e:
print(f" ❌ KREMC 오류: {e}")
return []
def fetch_history_hourly(plant_info, start_date, end_date):
"""
KREMC 발전소의 시간대별 과거 데이터 수집
Args:
plant_info: dict, 발전소 정보
start_date: str, 시작일 (YYYY-MM-DD)
end_date: str, 종료일 (YYYY-MM-DD)
Returns:
list: 시간대별 데이터 레코드
"""
from datetime import datetime, timedelta
import urllib.parse
results = []
# 설정 추출
plant_id = plant_info.get('id', 'kremc-05')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
options = plant_info.get('options', {})
plant_name = plant_info.get('name', '5호기')
user_id = auth.get('user_id', '')
password = auth.get('password', '')
login_url = system.get('login_url', '')
api_base = system.get('api_base', '')
enso_type = system.get('enso_type', '15001')
# KREMC 추가 파라미터
cid = options.get('cid', '10013000376')
city_prov_code = options.get('cityProvCode', '11')
rgn_code = options.get('rgnCode', '11410')
dong_code = options.get('dongCode', '1141011700')
session = create_session()
print(f"\n{'='*60}")
print(f"[KREMC History] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
# 로그인
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Content-Type': 'application/json',
'Accept': 'application/json, text/plain, */*',
'Origin': 'https://kremc.kr',
'Referer': 'https://kremc.kr/login'
}
try:
login_data = {'userId': user_id, 'password': password}
login_res = session.post(login_url, json=login_data, headers=headers, timeout=10)
if login_res.status_code != 200:
print(f" ✗ Login failed: {login_res.status_code}")
return results
login_json = login_res.json()
if login_json.get('status') == 200 or login_json.get('code') == 'S001':
data = login_json.get('data')
if isinstance(data, str) and len(data) > 10:
token = data
elif isinstance(data, dict):
token = data.get('token') or data.get('accessToken') or data.get('jwt')
if not token:
print(f" ✗ Token not found")
return results
else:
print(f" ✗ Invalid login data")
return results
else:
print(f" ✗ Login failed: {login_json.get('message', 'Unknown')}")
return results
print(f" ✓ Login successful")
# API 헤더 설정
api_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'application/json',
'X-Auth-Token': token
}
# 날짜 범위 반복
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
print(f"\n[Processing Date] {date_str}")
# 실제 확인된 시간별 엔드포인트
hourly_url = f"{api_base}/stat/userbyuser/meainDataList"
params = {
'cid': cid,
'userId': user_id,
'cityProvCode': city_prov_code,
'rgnCode': rgn_code,
'dongCode': dong_code,
'dateType': 'HH',
'startGathDtm': date_str,
'endGathDtm': date_str,
'ensoTypeCode': enso_type
}
try:
res = session.get(hourly_url, params=params, headers=api_headers, timeout=10)
if res.status_code == 200:
data = res.json()
# KREMC 실제 응답 구조: data.userByTimeDataResultDtoList
hourly_list = data.get('data', {}).get('userByTimeDataResultDtoList', [])
if isinstance(hourly_list, list) and len(hourly_list) > 0:
print(f" ✓ Found {len(hourly_list)} hourly records")
for item in hourly_list:
# gathDtm: "00시", "01시", ..., "23시"
time_str = item.get('gathDtm', '')
hour = time_str.replace('', '').zfill(2)
generation_kwh = safe_float(item.get('dayEnergy', 0))
timestamp = f"{date_str} {hour}:00:00"
results.append({
'plant_id': plant_id,
'timestamp': timestamp,
'generation_kwh': generation_kwh,
'current_kw': 0
})
else:
print(f" ⚠ No hourly data for {date_str}")
else:
print(f" ✗ HTTP {res.status_code}")
except Exception as e:
print(f" ✗ Error: {e}")
# 다음 날짜로
current_date += timedelta(days=1)
except Exception as e:
print(f" ✗ Overall error: {e}")
print(f"\n{'='*60}")
print(f"[Total] Collected {len(results)} hourly records")
print(f"{'='*60}\n")
return results
def fetch_history_daily(plant_info, start_date, end_date):
"""
KREMC 발전소의 일별 과거 데이터 수집 ( 단위 분할)
Args:
plant_info: 발전소 정보
start_date: str, 시작일 (YYYY-MM-DD)
end_date: str, 종료일 (YYYY-MM-DD)
"""
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import calendar
import urllib.parse
results = []
plant_id = plant_info.get('id', 'kremc-05')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
options = plant_info.get('options', {})
plant_name = plant_info.get('name', '5호기')
user_id = auth.get('user_id', '')
password = auth.get('password', '')
login_url = system.get('login_url', '')
api_base = system.get('api_base', '')
enso_type = system.get('enso_type', '15001')
# KREMC 추가 파라미터
cid = options.get('cid', '10013000376')
city_prov_code = options.get('cityProvCode', '11')
rgn_code = options.get('rgnCode', '11410')
dong_code = options.get('dongCode', '1141011700')
session = create_session()
print(f"\n{'='*60}")
print(f"[KREMC Daily] {plant_name} ({start_date} ~ {end_date}) - Looping by Month")
print(f"{'='*60}")
# 로그인
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/json',
'Accept': 'application/json'
}
try:
login_data = {'userId': user_id, 'password': password}
login_res = session.post(login_url, json=login_data, headers=headers, timeout=10)
if login_res.status_code != 200:
print(" ✗ Login failed")
return results
login_json = login_res.json()
data = login_json.get('data')
token = data if isinstance(data, str) else data.get('token') if isinstance(data, dict) else None
if not token:
print(" ✗ Token not found")
return results
print(" ✓ Login successful")
api_headers = {
'User-Agent': 'Mozilla/5.0',
'Accept': 'application/json',
'X-Auth-Token': token
}
# 월 단위 루프 적용
current_date_dt = datetime.strptime(start_date, '%Y-%m-%d')
end_date_dt = datetime.strptime(end_date, '%Y-%m-%d')
# 시작하는 달의 첫날로 맞춤 (단, 실제 요청 시에는 start_date 고려)
# 하지만 그냥 편의상 start_date가 속한 달부터 end_date가 속한 달까지 루프 돌면서
# API 요청 범위를 정교하게 자르는 게 좋음.
# 루프용 변수: 현재 처리 중인 기간의 시작일
loop_start = current_date_dt
while loop_start <= end_date_dt:
# 현재 달의 마지막 날 계산
last_day_of_month = calendar.monthrange(loop_start.year, loop_start.month)[1]
loop_end = loop_start.replace(day=last_day_of_month)
# 종료일이 전체 종료일보다 뒤면 조정
if loop_end > end_date_dt:
loop_end = end_date_dt
s_str = loop_start.strftime('%Y-%m-%d')
e_str = loop_end.strftime('%Y-%m-%d')
print(f" [Fetching] {s_str} ~ {e_str} ...", end="", flush=True)
try:
daily_url = f"{api_base}/stat/userbyuser/meainDataList"
params = {
'cid': cid,
'userId': user_id,
'cityProvCode': city_prov_code,
'rgnCode': rgn_code,
'dongCode': dong_code,
'dateType': 'DD',
'startGathDtm': s_str,
'endGathDtm': e_str,
'ensoTypeCode': enso_type
}
res = session.get(daily_url, params=params, headers=api_headers, timeout=15)
if res.status_code == 200:
data = res.json()
daily_list = data.get('data', {}).get('userByTimeDataResultDtoList', [])
if daily_list:
count = 0
for item in daily_list:
# gathDtm: "2026-01-01" 형식
date_str = item.get('gathDtm', '')
generation_kwh = safe_float(item.get('dayEnergy', 0))
# 날짜 문자열 정리 (혹시 모를 공백 등 제거)
date_str = date_str.strip()
if len(date_str) > 10:
date_str = date_str[:10]
results.append({
'plant_id': plant_id,
'date': date_str,
'generation_kwh': generation_kwh,
'current_kw': 0
})
count += 1
print(f" OK ({count} days)")
else:
print(" No data")
else:
print(f" HTTP {res.status_code}")
except Exception as e:
print(f" Error: {e}")
# 다음 기간 설정 (현재 기간 끝 다음날)
loop_start = loop_end + timedelta(days=1)
except Exception as e:
print(f" ✗ Overall Error: {e}")
print(f"\n[Total] Collected {len(results)} daily records\n")
return results
def fetch_history_monthly(plant_info, start_month, end_month):
"""
KREMC 발전소의 월별 과거 데이터 수집
KREMC는 dateType=MM을 지원하지 않음 (500 에러)
일별 데이터(dateType=DD) 월별로 집계
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
import urllib.parse
results = []
plant_id = plant_info.get('id', 'kremc-05')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
options = plant_info.get('options', {})
plant_name = plant_info.get('name', '5호기')
# 시작일자 체크
plant_start_date = plant_info.get('start_date', '2018-06-28')
plant_start_month = plant_start_date[:7] # YYYY-MM
# 실제 시작 월은 발전소 가동일 이후로 제한
if start_month < plant_start_month:
actual_start = plant_start_month
print(f" 발전소 가동일({plant_start_date}) 이후부터 수집: {actual_start}")
else:
actual_start = start_month
user_id = auth.get('user_id', '')
password = auth.get('password', '')
login_url = system.get('login_url', '')
api_base = system.get('api_base', '')
enso_type = system.get('enso_type', '15001')
# KREMC 추가 파라미터
cid = options.get('cid', '10013000376')
city_prov_code = options.get('cityProvCode', '11')
rgn_code = options.get('rgnCode', '11410')
dong_code = options.get('dongCode', '1141011700')
session = create_session()
print(f"\n{'='*60}")
print(f"[KREMC Monthly] {plant_name} ({actual_start} ~ {end_month})")
print(f"{'='*60}")
# 로그인
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/json',
'Accept': 'application/json'
}
login_data = {'userId': user_id, 'password': password}
login_res = session.post(login_url, json=login_data, headers=headers, timeout=10)
if login_res.status_code != 200:
print(" ✗ Login failed")
return results
login_json = login_res.json()
data = login_json.get('data')
token = data if isinstance(data, str) else data.get('token') if isinstance(data, dict) else None
if not token:
print(" ✗ Token not found")
return results
print(" ✓ Login successful")
api_headers = {
'User-Agent': 'Mozilla/5.0',
'Accept': 'application/json',
'X-Auth-Token': token
}
current_month = datetime.strptime(actual_start, '%Y-%m')
end_month_dt = datetime.strptime(end_month, '%Y-%m')
while current_month <= end_month_dt:
month_str = current_month.strftime('%Y-%m')
# 해당 월의 시작일과 마지막일 계산
first_day = current_month.strftime('%Y-%m-01')
if current_month.month == 12:
last_day = current_month.replace(day=31).strftime('%Y-%m-%d')
else:
next_month = current_month + relativedelta(months=1)
last_day = (next_month - relativedelta(days=1)).strftime('%Y-%m-%d')
try:
# dateType=DD로 일별 데이터를 가져와서 합산
daily_url = f"{api_base}/stat/userbyuser/meainDataList"
params = {
'cid': cid,
'userId': user_id,
'cityProvCode': city_prov_code,
'rgnCode': rgn_code,
'dongCode': dong_code,
'dateType': 'DD',
'startGathDtm': first_day,
'endGathDtm': last_day,
'ensoTypeCode': enso_type
}
res = session.get(daily_url, params=params, headers=api_headers, timeout=10)
if res.status_code == 200:
data = res.json()
# KREMC 실제 응답 구조: data.userByTimeDataResultDtoList
daily_list = data.get('data', {}).get('userByTimeDataResultDtoList', [])
if isinstance(daily_list, list) and len(daily_list) > 0:
# 일별 데이터를 합산하여 월별 데이터 생성
monthly_total = sum([safe_float(item.get('dayEnergy', 0)) for item in daily_list])
results.append({
'plant_id': plant_id,
'month': month_str,
'generation_kwh': monthly_total
})
print(f"{month_str}: {monthly_total:.1f}kWh (from {len(daily_list)} days)")
except Exception as e:
print(f" ✗ Error for {month_str}: {e}")
# 다음 달로
current_month += relativedelta(months=1)
print(f"[Total] Collected {len(results)} monthly records\n")
return results

618
crawlers/nrems.py Normal file
View File

@ -0,0 +1,618 @@
# ==========================================
# crawlers/nrems.py - NREMS 크롤러 (1,2,3,4,9호기)
# ==========================================
import requests
import json
import re
from datetime import datetime
from .base import safe_float, create_session, format_result
def _get_inverter_sums(session, pscode, system_config):
"""
1, 2호기 인버터별 일일 발전량 추출 (JSON API 사용)
"""
try:
today_str = datetime.now().strftime('%Y-%m-%d')
month_str = datetime.now().strftime('%Y-%m')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Referer': f'http://www.nrems.co.kr/v2/local/comp/cp_inv_time.php?pscode={pscode}'
}
data = {
'act': 'getList',
's_day': today_str,
's_date': today_str,
'e_date': today_str,
's_mon': month_str,
'e_mon': month_str,
'pscode': pscode,
'dispType': 'time'
}
inv_proc_url = system_config.get('inv_proc_url', '')
res = session.post(inv_proc_url, data=data, headers=headers, timeout=10)
if res.status_code == 200:
try:
json_data = res.json()
invlist = json_data.get('invlist', [])
sum_1 = 0.0
sum_2 = 0.0
for inv in invlist:
tidx = str(inv.get('tidx', ''))
sum_pw = safe_float(inv.get('sumPw'))
if tidx == '1':
sum_1 = sum_pw
elif tidx == '2':
sum_2 = sum_pw
if sum_1 > 0 or sum_2 > 0:
print(f" [API] 인버터 합계 추출 성공! (인버터1: {sum_1} kWh / 인버터2: {sum_2} kWh)")
return sum_1, sum_2
else:
print(f" ⚠️ API 응답에 인버터 데이터 없음")
return 0.0, 0.0
except json.JSONDecodeError:
print(f" ⚠️ JSON 파싱 실패")
return 0.0, 0.0
else:
print(f" ⚠️ API 응답 오류: {res.status_code}")
return 0.0, 0.0
except Exception as e:
print(f" [에러] {e}")
return 0.0, 0.0
def fetch_data(plant_info):
"""
NREMS 발전소 데이터 수집
Args:
plant_info: {
'id': 'nrems-03', # DB용 고유 ID (is_split인 경우 없음)
'name': '...',
'type': 'nrems',
'auth': {'pscode': '...'},
'options': {'is_split': True/False},
'system': {'api_url': '...', 'inv_proc_url': '...'},
'company_name': '...'
}
Returns:
list: [{'id': '...', 'name': '...', 'kw': 10.5, 'today': 100.0, 'status': '...'}]
"""
results = []
# 설정 추출
plant_id = plant_info.get('id', '') # DB용 고유 ID
pscode = plant_info['auth'].get('pscode', '')
is_split = plant_info['options'].get('is_split', False)
system_config = plant_info.get('system', {})
company_name = plant_info.get('company_name', '태양과바람')
plant_name = plant_info.get('name', '')
session = create_session()
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
try:
# 메인 데이터 요청
api_url = system_config.get('api_url', '')
res = session.post(api_url, data={'pscode': pscode}, headers=headers, timeout=10)
if res.status_code != 200:
return results
try:
data = res.json()
except:
return results
# 데이터 찾기
ps_list = data.get('ps_status')
target_data = None
if isinstance(ps_list, list):
for item in ps_list:
code_in_res = item.get('pscode')
wmu_in_res = item.get('WMU_CODE')
# Case-insensitive comparison
if (code_in_res and code_in_res.lower() == pscode.lower()) or \
(wmu_in_res and wmu_in_res.lower() == pscode.lower()):
target_data = item
break
if not target_data and len(ps_list) > 0:
print(f" ⚠️ Target pscode '{pscode}' not found in response. Available: {[i.get('pscode') for i in ps_list]}")
target_data = ps_list[0] # Fallback
print(f" ⚠️ Using fallback: {target_data.get('pscode')}")
elif isinstance(ps_list, dict):
target_data = ps_list
if not target_data:
target_data = {}
total_kw = safe_float(target_data.get('KW'))
total_today = safe_float(target_data.get('TDayKWH'))
inverters = data.get('ivt_value', [])
# Case A: 1, 2호기 분리 처리
if is_split:
real_sum_1, real_sum_2 = _get_inverter_sums(session, pscode, system_config)
kw_1 = safe_float(inverters[0].get('KW')) if len(inverters) >= 1 else 0.0
kw_2 = safe_float(inverters[1].get('KW')) if len(inverters) >= 2 else 0.0
if (real_sum_1 + real_sum_2) > 0:
today_1 = real_sum_1
today_2 = real_sum_2
else:
print(" ⚠️ 백업 로직(비율) 가동")
inv_total = kw_1 + kw_2
if inv_total > 0:
today_1 = total_today * (kw_1 / inv_total)
today_2 = total_today * (kw_2 / inv_total)
else:
today_1 = total_today / 2
today_2 = total_today / 2
# [중요] 1, 2호기는 ID를 강제 지정
results.append({
'id': 'nrems-01', # 1호기 고정 ID
'name': f'{company_name} 1호기',
'kw': kw_1,
'today': round(today_1, 2),
'status': "🟢 정상" if kw_1 > 0 else "💤 대기"
})
results.append({
'id': 'nrems-02', # 2호기 고정 ID
'name': f'{company_name} 2호기',
'kw': kw_2,
'today': round(today_2, 2),
'status': "🟢 정상" if kw_2 > 0 else "💤 대기"
})
# Case B: 3, 4, 9호기
else:
results.append({
'id': plant_id, # config에서 정의된 ID 사용
'name': f'{company_name} {plant_name}',
'kw': total_kw,
'today': total_today,
'status': "🟢 정상" if total_kw > 0 else "💤 대기"
})
except Exception as e:
print(f"❌ NREMS {plant_name} 오류: {e}")
if not is_split:
results.append({
'id': plant_id,
'name': f'{company_name} {plant_name}',
'kw': 0.0,
'today': 0.0,
'status': '🔴 오류'
})
return results
def fetch_history_hourly(plant_info, start_date, end_date):
"""
NREMS 발전소의 시간대별 과거 데이터 수집
Args:
plant_info: {
'id': 'nrems-03',
'name': '...',
'type': 'nrems',
'auth': {'pscode': '...'},
'options': {'is_split': True/False},
'system': {'api_url': '...', 'inv_proc_url': '...'},
'company_name': '...'
}
start_date: str, 시작일 (YYYY-MM-DD)
end_date: str, 종료일 (YYYY-MM-DD)
Returns:
list: [{
'plant_id': 'nrems-03',
'timestamp': '2026-01-15 14:00:00',
'generation_kwh': 123.5,
'current_kw': 15.2
}, ...]
"""
results = []
# 설정 추출
plant_id = plant_info.get('id', '')
pscode = plant_info['auth'].get('pscode', '')
is_split = plant_info['options'].get('is_split', False)
plant_name = plant_info.get('name', '')
# 날짜 범위 생성
from datetime import datetime, timedelta
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
session = create_session()
print(f"\n{'='*60}")
print(f"[NREMS Hourly] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
print(f"\n[Processing Date] {date_str}")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest'
}
try:
if is_split:
# 1,2호기: cp_inv_proc.php with dispType=time
url = 'http://www.nrems.co.kr/v2/local/proc/cp_inv_proc.php'
headers['Referer'] = f'http://www.nrems.co.kr/v2/local/comp/cp_inv_time.php?pscode={pscode}'
payload = {
'act': 'getList',
's_day': date_str,
's_date': date_str,
'e_date': date_str,
's_mon': date_str[:7],
'e_mon': date_str[:7],
'pscode': pscode,
'dispType': 'time'
}
else:
# 3,4,9호기: pl_time_proc.php with act=empty
url = 'http://www.nrems.co.kr/v2/local/proc/pl_time_proc.php'
headers['Referer'] = f'http://www.nrems.co.kr/v2/local/plant/pl_time.php?pscode={pscode}'
payload = {
'act': 'empty',
's_date': date_str,
'pscode': pscode
}
response = session.post(url, data=payload, headers=headers, timeout=10)
if response.status_code == 200:
data = response.json()
# 데이터 구조 확인
if is_split:
# 1,2호기: pwdata 키 사용
hourly_records = data.get('pwdata', [])
else:
# 3,4,9호기: pdata 키 사용
hourly_records = data.get('pdata', [])
if hourly_records:
print(f" ✓ Found {len(hourly_records)} hourly records")
for hour_data in hourly_records:
if is_split:
# 1,2호기: DATE, PW1, PW2
hour = hour_data.get('DATE', '00')
inv1_gen = safe_float(hour_data.get('PW1', 0))
inv2_gen = safe_float(hour_data.get('PW2', 0))
# timestamp 생성
timestamp = f"{date_str} {str(hour).zfill(2)}:00:00"
results.append({
'plant_id': 'nrems-01',
'timestamp': timestamp,
'generation_kwh': inv1_gen,
'current_kw': 0
})
results.append({
'plant_id': 'nrems-02',
'timestamp': timestamp,
'generation_kwh': inv2_gen,
'current_kw': 0
})
else:
# 3,4,9호기: TIME, INV
time_str = hour_data.get('TIME', '00:00')
hour = time_str.split(':')[0] # "14:00" -> "14"
generation_kwh = safe_float(hour_data.get('INV', 0))
# timestamp 생성
timestamp = f"{date_str} {str(hour).zfill(2)}:00:00"
results.append({
'plant_id': plant_id,
'timestamp': timestamp,
'generation_kwh': generation_kwh,
'current_kw': 0
})
print(f" → Collected {len(hourly_records)} records")
else:
print(f" ⚠ No hourly data for {date_str}")
else:
print(f" ✗ HTTP {response.status_code}")
except Exception as e:
print(f" ✗ Error: {e}")
current_date += timedelta(days=1)
print(f"\n{'='*60}")
print(f"[Total] Collected {len(results)} hourly records")
print(f"{'='*60}\n")
return results
def fetch_history_daily(plant_info, start_date, end_date):
"""
NREMS 발전소의 일별 과거 데이터 수집 ( 단위 루프)
Args:
plant_info: 발전소 정보
start_date: str, 시작일 (YYYY-MM-DD)
end_date: str, 종료일 (YYYY-MM-DD)
Returns:
list: [{'plant_id': '...', 'date': '2026-01-15', 'generation_kwh': 123.5}, ...]
"""
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import calendar
results = []
# 설정 추출
plant_id = plant_info.get('id', '')
pscode = plant_info['auth'].get('pscode', '')
is_split = plant_info['options'].get('is_split', False)
plant_name = plant_info.get('name', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[NREMS Daily] {plant_name} ({start_date} ~ {end_date}) - Looping by Month")
print(f"{'='*60}")
start_dt = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
current_dt = start_dt
while current_dt <= end_dt:
# 현재 처리할 달의 시작일과 종료일 계산
# 이번 달의 마지막 날
last_day_of_month = calendar.monthrange(current_dt.year, current_dt.month)[1]
chunk_end_dt = current_dt.replace(day=last_day_of_month)
# 요청 종료일이 전체 종료일보다 뒤면 전체 종료일로 제한
if chunk_end_dt > end_dt:
chunk_end_dt = end_dt
s_date_str = current_dt.strftime('%Y-%m-%d')
e_date_str = chunk_end_dt.strftime('%Y-%m-%d')
month_str = current_dt.strftime('%Y-%m')
print(f" [Fetching] {s_date_str} ~ {e_date_str} ...", end="", flush=True)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest'
}
try:
if is_split:
# 1,2호기: cp_inv_proc.php with dispType=day
url = 'http://www.nrems.co.kr/v2/local/proc/cp_inv_proc.php'
headers['Referer'] = f'http://www.nrems.co.kr/v2/local/comp/cp_inv_day.php?pscode={pscode}'
payload = {
'act': 'getList',
's_day': s_date_str, # s_day를 시작일로 변경
's_date': s_date_str,
'e_date': e_date_str,
's_mon': s_date_str[:7],
'e_mon': e_date_str[:7],
'pscode': pscode,
'dispType': 'day'
}
else:
# 3,4,9호기: pl_day_proc.php with s_day/e_day range
url = 'http://www.nrems.co.kr/v2/local/proc/pl_day_proc.php'
headers['Referer'] = f'http://www.nrems.co.kr/v2/local/plant/pl_day.php?pscode={pscode}'
payload = {
'act': 'empty',
's_day': s_date_str,
'e_day': e_date_str,
'pscode': pscode
}
response = session.post(url, data=payload, headers=headers, timeout=15)
if response.status_code == 200:
try:
data = response.json()
# 데이터 구조 확인
if is_split:
daily_records = data.get('pwdata', [])
else:
daily_records = data.get('pdata', [])
if daily_records:
count = 0
for day_data in daily_records:
# 날짜 추출
date_raw = day_data.get('DATE', '')
if not date_raw:
continue
# 날짜 형식 변환: "12-28" -> "2025-12-28" 보정
clean_date = date_raw
if '-' in date_raw and len(date_raw.split('-')[0]) <= 2:
mm, dd = date_raw.split('-')
year = current_dt.year
# 만약 12월 데이터인데 1월에 긁으면... 루프 변수 current_dt.year 사용하면 안전
clean_date = f"{year}-{mm.zfill(2)}-{dd.zfill(2)}"
if is_split:
inv1_gen = safe_float(day_data.get('PW1', 0))
inv2_gen = safe_float(day_data.get('PW2', 0))
results.append({'plant_id': 'nrems-01', 'date': clean_date, 'generation_kwh': inv1_gen})
results.append({'plant_id': 'nrems-02', 'date': clean_date, 'generation_kwh': inv2_gen})
count += 1
else:
generation_kwh = safe_float(day_data.get('INV', 0))
results.append({'plant_id': plant_id, 'date': clean_date, 'generation_kwh': generation_kwh})
count += 1
print(f" OK ({count} days)")
else:
print(f" No data")
except Exception as json_err:
print(f" JSON Error: {json_err}")
else:
print(f" HTTP {response.status_code}")
except Exception as e:
print(f" Error: {e}")
# 다음 달 1일로 이동
current_dt = (current_dt.replace(day=1) + timedelta(days=32)).replace(day=1)
print(f"\n[Total] Collected {len(results)} daily records\n")
return results
def fetch_history_monthly(plant_info, start_month, end_month):
"""
NREMS 발전소의 월별 과거 데이터 수집
Args:
plant_info: 발전소 정보
start_month: str, 시작월 (YYYY-MM)
end_month: str, 종료월 (YYYY-MM)
Returns:
list: [{'plant_id': '...', 'month': '2026-01', 'generation_kwh': 12345.6}, ...]
"""
from datetime import datetime
results = []
# 설정 추출
plant_id = plant_info.get('id', '')
pscode = plant_info['auth'].get('pscode', '')
is_split = plant_info['options'].get('is_split', False)
plant_name = plant_info.get('name', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[NREMS Monthly] {plant_name} ({start_month} ~ {end_month})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest'
}
try:
if is_split:
# 1,2호기: cp_inv_proc.php with dispType=mon
url = 'http://www.nrems.co.kr/v2/local/proc/cp_inv_proc.php'
headers['Referer'] = f'http://www.nrems.co.kr/v2/local/comp/cp_inv_month.php?pscode={pscode}'
payload = {
'act': 'getList',
's_day': f"{end_month}-01",
's_date': f"{start_month}-01",
'e_date': f"{end_month}-01",
's_mon': start_month,
'e_mon': end_month,
'pscode': pscode,
'dispType': 'mon'
}
else:
# 3,4,9호기: pl_month_proc.php with s_date/e_date (YYYY-MM)
url = 'http://www.nrems.co.kr/v2/local/proc/pl_month_proc.php'
headers['Referer'] = f'http://www.nrems.co.kr/v2/local/plant/pl_month.php?pscode={pscode}'
payload = {
'act': 'empty',
's_date': start_month,
'e_date': end_month,
'pscode': pscode
}
response = session.post(url, data=payload, headers=headers, timeout=15)
if response.status_code == 200:
data = response.json()
# 데이터 구조 확인
if is_split:
# 1,2호기: pwdata 키 사용
monthly_records = data.get('pwdata', [])
else:
# 3,4,9호기: pdata 키 사용
monthly_records = data.get('pdata', [])
if monthly_records:
print(f" ✓ Found {len(monthly_records)} monthly records")
for month_data in monthly_records:
# 월 추출
month_str = month_data.get('DATE', '')
if not month_str:
continue
if is_split:
# 1,2호기: PW1, PW2 분리
inv1_gen = safe_float(month_data.get('PW1', 0))
inv2_gen = safe_float(month_data.get('PW2', 0))
results.append({
'plant_id': 'nrems-01',
'month': month_str,
'generation_kwh': inv1_gen
})
results.append({
'plant_id': 'nrems-02',
'month': month_str,
'generation_kwh': inv2_gen
})
print(f"{month_str}: Unit1={inv1_gen}kWh, Unit2={inv2_gen}kWh")
else:
# 3,4,9호기: INV 단일값
generation_kwh = safe_float(month_data.get('INV', 0))
results.append({
'plant_id': plant_id,
'month': month_str,
'generation_kwh': generation_kwh
})
print(f"{month_str}: {generation_kwh}kWh")
print(f" → Collected {len(monthly_records)} records")
else:
print(f" ⚠ No monthly data found")
else:
print(f" ✗ HTTP {response.status_code}")
except Exception as e:
print(f" ✗ Error: {e}")
print(f"\n[Total] Collected {len(results)} monthly records\n")
return results

430
crawlers/sun_wms.py Normal file
View File

@ -0,0 +1,430 @@
# ==========================================
# crawlers/sun_wms.py - Sun-WMS 크롤러 (6호기)
# HTML 테이블 파싱 방식
# ==========================================
import requests
import re
import time
from .base import create_session, safe_float
def fetch_data(plant_info):
"""
Sun-WMS 발전소 데이터 수집
"""
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
company_name = plant_info.get('company_name', '태양과바람')
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
data_url = system.get('data_url', '')
session = create_session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'http://tb6.sun-wms.com/public/main/login.php',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
# 1. 로그인
login_data = {
'act': 'loginChk',
'user_id': payload_id,
'user_pass': payload_pw
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code != 200:
return []
except Exception as e:
print(f"{plant_name} 접속 에러: {e}")
return []
# 2. 데이터 요청
try:
timestamp = int(time.time() * 1000)
res = session.get(f"{data_url}?time={timestamp}", headers=headers)
res.encoding = 'euc-kr'
content = res.text
match_kw = re.search(r"id=['\"]cur_power['\"].*?value=['\"]([^'\"]+)['\"]", content)
curr_kw = float(match_kw.group(1)) if match_kw else 0.0
match_today = re.search(r"id=['\"]today_power['\"].*?value=['\"]([^'\"]+)['\"]", content)
today_kwh = float(match_today.group(1)) if match_today else 0.0
status = "🟢 정상" if curr_kw > 0 else "💤 대기"
return [{
'id': plant_id,
'name': f'{company_name} {plant_name}',
'kw': curr_kw,
'today': today_kwh,
'status': status
}]
except Exception as e:
print(f"{plant_name} 에러: {e}")
return []
def fetch_history_hourly(plant_info, start_date, end_date):
"""
Sun-WMS 발전소의 시간대별 과거 데이터 수집
실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답)
파라미터: tab01=0&tab02=1&tab03=2&tord=1&s_day=YYYY-MM-DD
"""
from datetime import datetime, timedelta
results = []
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
# base_url 추출
base_url = system.get('base_url', '')
statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php")
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS History] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 날짜 반복
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
# 실제 확인된 시간별 엔드포인트
params = {
'tab01': '0',
'tab02': '1',
'tab03': '2',
'tord': '1',
's_day': date_str
}
try:
res = session.get(statics_url, params=params, headers=headers, timeout=10)
res.encoding = 'euc-kr'
if res.status_code == 200:
# HTML 테이블 파싱
html = res.text
# <tbody> 안의 <tr> 태그 찾기
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
if tbody_match:
tbody_content = tbody_match.group(1)
# 각 <tr> 파싱
tr_pattern = r'<tr>\s*<td>(\d{2}):00</td>\s*<td>([\d.]+)</td>\s*</tr>'
matches = re.findall(tr_pattern, tbody_content)
if matches:
print(f" ✓ Found {len(matches)} hourly records")
for hour, kwh in matches:
generation_kwh = safe_float(kwh)
timestamp = f"{date_str} {hour}:00:00"
results.append({
'plant_id': plant_id,
'timestamp': timestamp,
'generation_kwh': generation_kwh,
'current_kw': 0
})
else:
print(f" ⚠ No data for {date_str}")
else:
print(f" ⚠ No tbody found for {date_str}")
else:
print(f" ✗ HTTP {res.status_code}")
except Exception as e:
print(f" ✗ Error: {e}")
current_date += timedelta(days=1)
print(f"\n{'='*60}")
print(f"[Total] Collected {len(results)} hourly records")
print(f"{'='*60}\n")
return results
def fetch_history_daily(plant_info, start_date, end_date):
"""
Sun-WMS 발전소의 일별 과거 데이터 수집 ( 단위 분할)
실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답)
파라미터: tab01=0&tab02=2&tab03=2&tord=2&s_day=YYYY-MM-DD&e_day=YYYY-MM-DD
"""
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import calendar
import re
from .base import safe_float, create_session
results = []
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
base_url = system.get('base_url', '')
statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php")
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS Daily] {plant_name} ({start_date} ~ {end_date}) - Looping by Month")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
# 로그인
try:
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 월 단위 루프 적용
start_dt = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
loop_start = start_dt
while loop_start <= end_dt:
# 현재 달의 마지막 날 계산
last_day_of_month = calendar.monthrange(loop_start.year, loop_start.month)[1]
loop_end = loop_start.replace(day=last_day_of_month)
# 종료일이 전체 종료일보다 뒤면 조정
if loop_end > end_dt:
loop_end = end_dt
s_str = loop_start.strftime('%Y-%m-%d')
e_str = loop_end.strftime('%Y-%m-%d')
print(f" [Fetching] {s_str} ~ {e_str} ...", end="", flush=True)
params = {
'tab01': '0',
'tab02': '2',
'tab03': '2',
'tord': '2',
's_day': s_str,
'e_day': e_str
}
try:
res = session.get(statics_url, params=params, headers=headers, timeout=15)
res.encoding = 'euc-kr'
if res.status_code == 200:
html = res.text
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
if tbody_match:
tbody_content = tbody_match.group(1)
tr_pattern = r'<tr>\s*<td>(\d{4}-\d{2}-\d{2})</td>\s*<td>([\d.]+)</td>'
matches = re.findall(tr_pattern, tbody_content)
if matches:
count = 0
for date_str, kwh in matches:
generation_kwh = safe_float(kwh)
results.append({
'plant_id': plant_id,
'date': date_str,
'generation_kwh': generation_kwh,
'current_kw': 0
})
count += 1
print(f" OK ({count} days)")
else:
print(" No data")
else:
print(" No tbody")
else:
print(f" HTTP {res.status_code}")
except Exception as e:
print(f" Error: {e}")
# 다음 기간 설정
loop_start = loop_end + timedelta(days=1)
print(f"\n[Total] Collected {len(results)} daily records\n")
return results
def fetch_history_monthly(plant_info, start_month, end_month):
"""
Sun-WMS 발전소의 월별 과거 데이터 수집
실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답)
월별 데이터는 일별 데이터를 월별로 집계
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
results = []
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
# 시작일자 체크
plant_start_date = plant_info.get('start_date', '2019-12-30')
plant_start_month = plant_start_date[:7] # YYYY-MM
# 실제 시작 월은 발전소 가동일 이후로 제한
if start_month < plant_start_month:
actual_start = plant_start_month
print(f" 발전소 가동일({plant_start_date}) 이후부터 수집: {actual_start}")
else:
actual_start = start_month
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
# base_url 추출
base_url = system.get('base_url', '')
statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php")
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS Monthly] {plant_name} ({actual_start} ~ {end_month})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 월 단위로 반복
current_month = datetime.strptime(actual_start, '%Y-%m')
end_month_dt = datetime.strptime(end_month, '%Y-%m')
while current_month <= end_month_dt:
month_str = current_month.strftime('%Y-%m')
# 해당 월의 시작일과 마지막일
first_day = current_month.strftime('%Y-%m-01')
if current_month.month == 12:
last_day = current_month.replace(day=31).strftime('%Y-%m-%d')
else:
next_month = current_month + relativedelta(months=1)
last_day = (next_month - relativedelta(days=1)).strftime('%Y-%m-%d')
# 일별 엔드포인트로 한 달치 데이터 수집해서 합산
params = {
'tab01': '0',
'tab02': '2',
'tab03': '2',
'tord': '2',
's_day': first_day,
'e_day': last_day
}
try:
res = session.get(statics_url, params=params, headers=headers, timeout=10)
res.encoding = 'euc-kr'
if res.status_code == 200:
# HTML 테이블 파싱
html = res.text
# <tbody> 안의 <tr> 태그 찾기
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
if tbody_match:
tbody_content = tbody_match.group(1)
# 각 <tr> 파싱 (날짜와 발전량)
tr_pattern = r'<tr>\s*<td>(\d{4}-\d{2}-\d{2})</td>\s*<td>([\d.]+)</td>'
matches = re.findall(tr_pattern, tbody_content)
if matches:
# 일별 데이터를 합산
monthly_total = sum([safe_float(kwh) for _, kwh in matches])
results.append({
'plant_id': plant_id,
'month': month_str,
'generation_kwh': monthly_total
})
print(f"{month_str}: {monthly_total:.1f}kWh (from {len(matches)} days)")
else:
print(f" ⚠ No data for {month_str}")
except Exception as e:
print(f" ✗ Error for {month_str}: {e}")
# 다음 달로
current_month += relativedelta(months=1)
print(f"[Total] Collected {len(results)} monthly records\n")
return results

343
crawlers/sun_wms.py.backup Normal file
View File

@ -0,0 +1,343 @@
# ==========================================
# crawlers/sun_wms.py - Sun-WMS 크롤러 (6호기)
# ==========================================
import requests
import re
import time
from .base import create_session
def fetch_data(plant_info):
"""
Sun-WMS 발전소 데이터 수집
"""
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
company_name = plant_info.get('company_name', '태양과바람')
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
data_url = system.get('data_url', '')
session = create_session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'http://tb6.sun-wms.com/public/main/login.php',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
# 1. 로그인
login_data = {
'act': 'loginChk',
'user_id': payload_id,
'user_pass': payload_pw
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code != 200:
return []
except Exception as e:
print(f"❌ {plant_name} 접속 에러: {e}")
return []
# 2. 데이터 요청
try:
timestamp = int(time.time() * 1000)
res = session.get(f"{data_url}?time={timestamp}", headers=headers)
res.encoding = 'euc-kr'
content = res.text
match_kw = re.search(r"id=['\"]cur_power['\"].*?value=['\"]([^'\"]+)['\"]", content)
curr_kw = float(match_kw.group(1)) if match_kw else 0.0
match_today = re.search(r"id=['\"]today_power['\"].*?value=['\"]([^'\"]+)['\"]", content)
today_kwh = float(match_today.group(1)) if match_today else 0.0
status = "🟢 정상" if curr_kw > 0 else "💤 대기"
return [{
'id': plant_id,
'name': f'{company_name} {plant_name}',
'kw': curr_kw,
'today': today_kwh,
'status': status
}]
except Exception as e:
print(f"❌ {plant_name} 에러: {e}")
return []
def fetch_history_daily(plant_info, start_date, end_date):
"""
Sun-WMS 발전소의 일별 과거 데이터 수집
"""
from datetime import datetime, timedelta
from .base import safe_float
import time
results = []
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
base_url = system.get('base_url', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS Daily] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
# 일별 데이터 엔드포인트 (추정)
daily_url = f"{base_url}/public/chart/getDailyData.php?date={date_str}"
try:
timestamp = int(time.time() * 1000)
res = session.get(f"{daily_url}&time={timestamp}", headers=headers, timeout=10)
res.encoding = 'euc-kr'
if res.status_code == 200:
data = res.json()
daily_kwh = safe_float(data.get('daily', data.get('today', 0)))
results.append({
'plant_id': plant_id,
'date': date_str,
'generation_kwh': daily_kwh
})
print(f" ✓ {date_str}: {daily_kwh}kWh")
except Exception as e:
print(f" ✗ {date_str}: {e}")
current_date += timedelta(days=1)
print(f"[Total] Collected {len(results)} daily records\n")
return results
def fetch_history_monthly(plant_info, start_month, end_month):
"""
Sun-WMS 발전소의 월별 과거 데이터 수집
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
from .base import safe_float
import time
results = []
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
base_url = system.get('base_url', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS Monthly] {plant_name} ({start_month} ~ {end_month})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
current_month = datetime.strptime(start_month, '%Y-%m')
end_month_dt = datetime.strptime(end_month, '%Y-%m')
while current_month <= end_month_dt:
month_str = current_month.strftime('%Y-%m')
# 월별 데이터 엔드포인트 (추정)
monthly_url = f"{base_url}/public/chart/getMonthlyData.php?month={month_str}"
try:
timestamp = int(time.time() * 1000)
res = session.get(f"{monthly_url}&time={timestamp}", headers=headers, timeout=10)
res.encoding = 'euc-kr'
if res.status_code == 200:
data = res.json()
monthly_kwh = safe_float(data.get('monthly', data.get('month', 0)))
results.append({
'plant_id': plant_id,
'month': month_str,
'generation_kwh': monthly_kwh
})
print(f" ✓ {month_str}: {monthly_kwh}kWh")
except Exception as e:
print(f" ✗ {month_str}: {e}")
current_month += relativedelta(months=1)
print(f"[Total] Collected {len(results)} monthly records\n")
return results
def fetch_history_hourly(plant_info, start_date, end_date):
"""
Sun-WMS 발전소의 시간대별 과거 데이터 수집
Args:
plant_info: dict, 발전소 정보
start_date: str, 시작일 (YYYY-MM-DD)
end_date: str, 종료일 (YYYY-MM-DD)
Returns:
list: 시간대별 데이터 레코드
"""
from datetime import datetime, timedelta
from .base import safe_float
import time
results = []
# 설정 추출
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
base_url = system.get('base_url', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS History] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
# 로그인
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'http://tb6.sun-wms.com/public/main/login.php',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {
'act': 'loginChk',
'user_id': payload_id,
'user_pass': payload_pw
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code != 200:
print(f" ✗ Login failed")
return results
print(f" ✓ Login successful")
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 날짜 범위 반복
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
print(f"\n[Processing Date] {date_str}")
# 시간대별 데이터 엔드포인트 (추정)
hourly_url = f"{base_url}/public/chart/getHourlyData.php?date={date_str}"
try:
timestamp = int(time.time() * 1000)
res = session.get(f"{hourly_url}&time={timestamp}", headers=headers, timeout=10)
res.encoding = 'euc-kr'
if res.status_code == 200:
data = res.json()
hourly_data = data if isinstance(data, list) else data.get('hourly', [])
if hourly_data and len(hourly_data) > 0:
print(f" ✓ Found {len(hourly_data)} hourly records")
for item in hourly_data:
hour = str(item.get('hour', item.get('time', '00'))).zfill(2)
generation_kwh = safe_float(item.get('power', item.get('kwh', 0)))
current_kw = safe_float(item.get('kw', 0))
timestamp = f"{date_str} {hour}:00:00"
results.append({
'plant_id': plant_id,
'timestamp': timestamp,
'generation_kwh': generation_kwh,
'current_kw': current_kw
})
else:
print(f" ⚠ No hourly data for {date_str}")
else:
print(f" ✗ HTTP {res.status_code}")
except Exception as e:
print(f" ✗ Error: {e}")
# 다음 날짜로
current_date += timedelta(days=1)
print(f"\n{'='*60}")
print(f"[Total] Collected {len(results)} hourly records")
print(f"{'='*60}\n")
return results

359
crawlers/sun_wms_json.py Normal file
View File

@ -0,0 +1,359 @@
# ==========================================
# crawlers/sun_wms.py - Sun-WMS 크롤러 (6호기)
# ==========================================
import requests
import re
import time
from .base import create_session, safe_float
def fetch_data(plant_info):
"""
Sun-WMS 발전소 데이터 수집
"""
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
company_name = plant_info.get('company_name', '태양과바람')
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
data_url = system.get('data_url', '')
session = create_session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'http://tb6.sun-wms.com/public/main/login.php',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
# 1. 로그인
login_data = {
'act': 'loginChk',
'user_id': payload_id,
'user_pass': payload_pw
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code != 200:
return []
except Exception as e:
print(f"{plant_name} 접속 에러: {e}")
return []
# 2. 데이터 요청
try:
timestamp = int(time.time() * 1000)
res = session.get(f"{data_url}?time={timestamp}", headers=headers)
res.encoding = 'euc-kr'
content = res.text
match_kw = re.search(r"id=['\"]cur_power['\"].*?value=['\"]([^'\"]+)['\"]", content)
curr_kw = float(match_kw.group(1)) if match_kw else 0.0
match_today = re.search(r"id=['\"]today_power['\"].*?value=['\"]([^'\"]+)['\"]", content)
today_kwh = float(match_today.group(1)) if match_today else 0.0
status = "🟢 정상" if curr_kw > 0 else "💤 대기"
return [{
'id': plant_id,
'name': f'{company_name} {plant_name}',
'kw': curr_kw,
'today': today_kwh,
'status': status
}]
except Exception as e:
print(f"{plant_name} 에러: {e}")
return []
def fetch_history_hourly(plant_info, start_date, end_date):
"""
Sun-WMS 발전소의 시간대별 과거 데이터 수집
실제 엔드포인트: /public/statics/statics.php
파라미터: tab01=0&tab02=1&tab03=2&tord=1&s_day=YYYY-MM-DD
"""
from datetime import datetime, timedelta
results = []
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
# base_url 추출
base_url = system.get('base_url', '')
if not base_url and 'http' in login_url:
base_url = login_url.split('/public')[0]
statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php")
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS History] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 날짜 반복
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
# 실제 확인된 시간별 엔드포인트
params = {
'tab01': '0',
'tab02': '1',
'tab03': '2',
'tord': '1',
's_day': date_str
}
try:
res = session.get(statics_url, params=params, headers=headers, timeout=10)
res.encoding = 'euc-kr'
if res.status_code == 200:
data = res.json()
# 시간별 데이터 파싱
hourly_data = data.get('data', []) or data.get('list', [])
if isinstance(hourly_data, list) and len(hourly_data) > 0:
print(f" ✓ Found {len(hourly_data)} hourly records")
for item in hourly_data:
hour = str(item.get('hour', item.get('time', '00'))).zfill(2)
generation_kwh = safe_float(item.get('generation', item.get('kwh', 0)))
current_kw = safe_float(item.get('power', item.get('kw', 0)))
timestamp = f"{date_str} {hour}:00:00"
results.append({
'plant_id': plant_id,
'timestamp': timestamp,
'generation_kwh': generation_kwh,
'current_kw': current_kw
})
else:
print(f" ⚠ No data for {date_str}")
else:
print(f" ✗ HTTP {res.status_code}")
except Exception as e:
print(f" ✗ Error: {e}")
current_date += timedelta(days=1)
print(f"\n{'='*60}")
print(f"[Total] Collected {len(results)} hourly records")
print(f"{'='*60}\n")
return results
def fetch_history_daily(plant_info, start_date, end_date):
"""
Sun-WMS 발전소의 일별 과거 데이터 수집
실제 엔드포인트: /public/statics/statics.php
파라미터: tab01=0&tab02=2&tab03=2&tord=2&s_day=YYYY-MM-DD&e_day=YYYY-MM-DD
"""
from datetime import datetime
results = []
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
# base_url 추출
base_url = system.get('base_url', '')
if not base_url and 'http' in login_url:
base_url = login_url.split('/public')[0]
statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php")
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS Daily] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 실제 확인된 일별 엔드포인트
params = {
'tab01': '0',
'tab02': '2',
'tab03': '2',
'tord': '2',
's_day': start_date,
'e_day': end_date
}
try:
res = session.get(statics_url, params=params, headers=headers, timeout=10)
res.encoding = 'euc-kr'
if res.status_code == 200:
data = res.json()
# 일별 데이터 파싱
daily_data = data.get('data', []) or data.get('list', [])
if isinstance(daily_data, list) and len(daily_data) > 0:
for item in daily_data:
date_str = item.get('date', item.get('day', ''))
generation_kwh = safe_float(item.get('generation', item.get('kwh', 0)))
current_kw = safe_float(item.get('power', item.get('kw', 0)))
results.append({
'plant_id': plant_id,
'date': date_str,
'generation_kwh': generation_kwh,
'current_kw': current_kw
})
print(f"{date_str}: {generation_kwh:.2f}kWh")
except Exception as e:
print(f" ✗ Error: {e}")
print(f"[Total] Collected {len(results)} daily records\n")
return results
def fetch_history_monthly(plant_info, start_month, end_month):
"""
Sun-WMS 발전소의 월별 과거 데이터 수집
실제 엔드포인트: /public/statics/statics.php
파라미터: tab01=0&tab02=3&tab03=2&tord=3&s_day=YYYY-MM&e_day=YYYY-MM
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
results = []
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
# base_url 추출
base_url = system.get('base_url', '')
if not base_url and 'http' in login_url:
base_url = login_url.split('/public')[0]
statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php")
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS Monthly] {plant_name} ({start_month} ~ {end_month})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 실제 확인된 월별 엔드포인트
params = {
'tab01': '0',
'tab02': '3',
'tab03': '2',
'tord': '3',
's_day': start_month,
'e_day': end_month
}
try:
res = session.get(statics_url, params=params, headers=headers, timeout=10)
res.encoding = 'euc-kr'
if res.status_code == 200:
data = res.json()
# 월별 데이터 파싱
monthly_data = data.get('data', []) or data.get('list', [])
if isinstance(monthly_data, list) and len(monthly_data) > 0:
for item in monthly_data:
month_str = item.get('month', item.get('date', ''))
generation_kwh = safe_float(item.get('generation', item.get('kwh', item.get('monthTotal', 0))))
results.append({
'plant_id': plant_id,
'month': month_str[:7] if len(month_str) >= 7 else month_str,
'generation_kwh': generation_kwh
})
print(f"{month_str[:7]}: {generation_kwh:.1f}kWh")
except Exception as e:
print(f" ✗ Error: {e}")
print(f"[Total] Collected {len(results)} monthly records\n")
return results

343
crawlers/sun_wms_old.py Normal file
View File

@ -0,0 +1,343 @@
# ==========================================
# crawlers/sun_wms.py - Sun-WMS 크롤러 (6호기)
# ==========================================
import requests
import re
import time
from .base import create_session
def fetch_data(plant_info):
"""
Sun-WMS 발전소 데이터 수집
"""
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
company_name = plant_info.get('company_name', '태양과바람')
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
data_url = system.get('data_url', '')
session = create_session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'http://tb6.sun-wms.com/public/main/login.php',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
# 1. 로그인
login_data = {
'act': 'loginChk',
'user_id': payload_id,
'user_pass': payload_pw
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code != 200:
return []
except Exception as e:
print(f"{plant_name} 접속 에러: {e}")
return []
# 2. 데이터 요청
try:
timestamp = int(time.time() * 1000)
res = session.get(f"{data_url}?time={timestamp}", headers=headers)
res.encoding = 'euc-kr'
content = res.text
match_kw = re.search(r"id=['\"]cur_power['\"].*?value=['\"]([^'\"]+)['\"]", content)
curr_kw = float(match_kw.group(1)) if match_kw else 0.0
match_today = re.search(r"id=['\"]today_power['\"].*?value=['\"]([^'\"]+)['\"]", content)
today_kwh = float(match_today.group(1)) if match_today else 0.0
status = "🟢 정상" if curr_kw > 0 else "💤 대기"
return [{
'id': plant_id,
'name': f'{company_name} {plant_name}',
'kw': curr_kw,
'today': today_kwh,
'status': status
}]
except Exception as e:
print(f"{plant_name} 에러: {e}")
return []
def fetch_history_daily(plant_info, start_date, end_date):
"""
Sun-WMS 발전소의 일별 과거 데이터 수집
"""
from datetime import datetime, timedelta
from .base import safe_float
import time
results = []
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
base_url = system.get('base_url', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS Daily] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
# 일별 데이터 엔드포인트 (추정)
daily_url = f"{base_url}/public/chart/getDailyData.php?date={date_str}"
try:
timestamp = int(time.time() * 1000)
res = session.get(f"{daily_url}&time={timestamp}", headers=headers, timeout=10)
res.encoding = 'euc-kr'
if res.status_code == 200:
data = res.json()
daily_kwh = safe_float(data.get('daily', data.get('today', 0)))
results.append({
'plant_id': plant_id,
'date': date_str,
'generation_kwh': daily_kwh
})
print(f"{date_str}: {daily_kwh}kWh")
except Exception as e:
print(f"{date_str}: {e}")
current_date += timedelta(days=1)
print(f"[Total] Collected {len(results)} daily records\n")
return results
def fetch_history_monthly(plant_info, start_month, end_month):
"""
Sun-WMS 발전소의 월별 과거 데이터 수집
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
from .base import safe_float
import time
results = []
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
base_url = system.get('base_url', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS Monthly] {plant_name} ({start_month} ~ {end_month})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
current_month = datetime.strptime(start_month, '%Y-%m')
end_month_dt = datetime.strptime(end_month, '%Y-%m')
while current_month <= end_month_dt:
month_str = current_month.strftime('%Y-%m')
# 월별 데이터 엔드포인트 (추정)
monthly_url = f"{base_url}/public/chart/getMonthlyData.php?month={month_str}"
try:
timestamp = int(time.time() * 1000)
res = session.get(f"{monthly_url}&time={timestamp}", headers=headers, timeout=10)
res.encoding = 'euc-kr'
if res.status_code == 200:
data = res.json()
monthly_kwh = safe_float(data.get('monthly', data.get('month', 0)))
results.append({
'plant_id': plant_id,
'month': month_str,
'generation_kwh': monthly_kwh
})
print(f"{month_str}: {monthly_kwh}kWh")
except Exception as e:
print(f"{month_str}: {e}")
current_month += relativedelta(months=1)
print(f"[Total] Collected {len(results)} monthly records\n")
return results
def fetch_history_hourly(plant_info, start_date, end_date):
"""
Sun-WMS 발전소의 시간대별 과거 데이터 수집
Args:
plant_info: dict, 발전소 정보
start_date: str, 시작일 (YYYY-MM-DD)
end_date: str, 종료일 (YYYY-MM-DD)
Returns:
list: 시간대별 데이터 레코드
"""
from datetime import datetime, timedelta
from .base import safe_float
import time
results = []
# 설정 추출
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
base_url = system.get('base_url', '')
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS History] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
# 로그인
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'http://tb6.sun-wms.com/public/main/login.php',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {
'act': 'loginChk',
'user_id': payload_id,
'user_pass': payload_pw
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code != 200:
print(f" ✗ Login failed")
return results
print(f" ✓ Login successful")
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 날짜 범위 반복
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
print(f"\n[Processing Date] {date_str}")
# 시간대별 데이터 엔드포인트 (추정)
hourly_url = f"{base_url}/public/chart/getHourlyData.php?date={date_str}"
try:
timestamp = int(time.time() * 1000)
res = session.get(f"{hourly_url}&time={timestamp}", headers=headers, timeout=10)
res.encoding = 'euc-kr'
if res.status_code == 200:
data = res.json()
hourly_data = data if isinstance(data, list) else data.get('hourly', [])
if hourly_data and len(hourly_data) > 0:
print(f" ✓ Found {len(hourly_data)} hourly records")
for item in hourly_data:
hour = str(item.get('hour', item.get('time', '00'))).zfill(2)
generation_kwh = safe_float(item.get('power', item.get('kwh', 0)))
current_kw = safe_float(item.get('kw', 0))
timestamp = f"{date_str} {hour}:00:00"
results.append({
'plant_id': plant_id,
'timestamp': timestamp,
'generation_kwh': generation_kwh,
'current_kw': current_kw
})
else:
print(f" ⚠ No hourly data for {date_str}")
else:
print(f" ✗ HTTP {res.status_code}")
except Exception as e:
print(f" ✗ Error: {e}")
# 다음 날짜로
current_date += timedelta(days=1)
print(f"\n{'='*60}")
print(f"[Total] Collected {len(results)} hourly records")
print(f"{'='*60}\n")
return results

200
daily_summary.py Normal file
View File

@ -0,0 +1,200 @@
# ==========================================
# daily_summary.py - 일일 발전 통계 집계
# ==========================================
# solar_logs 데이터를 집계하여 daily_stats 테이블에 저장
from datetime import datetime, timedelta
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
pass
import pandas as pd
from database import get_supabase_client
def get_plant_capacities(client) -> dict:
"""plants 테이블에서 용량 정보 조회"""
try:
result = client.table("plants").select("id, capacity").execute()
return {row['id']: row.get('capacity', 99.0) for row in result.data}
except Exception as e:
print(f" ⚠️ 용량 조회 실패: {e}")
return {}
def calculate_daily_stats(date_str: str = None):
"""
특정 날짜의 발전 통계 집계
Args:
date_str: 집계 대상 날짜 (YYYY-MM-DD). 미지정 오늘.
"""
if date_str is None:
date_str = datetime.now().strftime('%Y-%m-%d')
print(f"\n📊 [일일 통계 집계] {date_str}")
print("-" * 60)
client = get_supabase_client()
if not client:
print("❌ Supabase 연결 실패")
return False
# 1. 용량 정보 조회
capacities = get_plant_capacities(client)
# 2. 해당일 로그 조회
start_dt = f"{date_str}T00:00:00"
end_dt = f"{date_str}T23:59:59"
try:
result = client.table("solar_logs") \
.select("plant_id, current_kw, today_kwh, created_at") \
.gte("created_at", start_dt) \
.lte("created_at", end_dt) \
.order("created_at", desc=False) \
.execute()
if not result.data:
print(" ⚠️ 해당 날짜의 로그가 없습니다.")
return False
df = pd.DataFrame(result.data)
except Exception as e:
print(f" ❌ 로그 조회 실패: {e}")
return False
# 3. 발전소별 통계 계산
stats_list = []
for plant_id, group in df.groupby('plant_id'):
# 마지막 로그의 today_kwh
total_generation = group['today_kwh'].iloc[-1] if len(group) > 0 else 0
# 최대 출력
peak_kw = group['current_kw'].max() if len(group) > 0 else 0
# 이용률 시간 = 발전량 / 용량
capacity = capacities.get(plant_id, 99.0)
generation_hours = round(total_generation / capacity, 2) if capacity > 0 else 0
stats = {
'plant_id': plant_id,
'date': date_str,
'total_generation': round(total_generation, 2),
'peak_kw': round(peak_kw, 2),
'generation_hours': generation_hours
}
stats_list.append(stats)
# 출력
print(f" {plant_id}: {total_generation:.1f}kWh ({generation_hours:.1f}시간, 최대 {peak_kw:.1f}kW)")
# 4. daily_stats 테이블에 Upsert
if stats_list:
try:
result = client.table("daily_stats").upsert(
stats_list,
on_conflict="plant_id,date"
).execute()
print("-" * 60)
print(f"{len(stats_list)}개 발전소 통계 저장 완료")
except Exception as e:
print(f" ❌ 저장 실패: {e}")
return False
return True
def calculate_monthly_stats(target_month: str):
"""
특정 월의 발전 통계 집계 (일간 데이터 합산)
Args:
target_month: YYYY-MM
"""
print(f"\n📅 [월간 통계 집계] {target_month}")
print("-" * 60)
client = get_supabase_client()
if not client:
return False
try:
# 1. 모든 발전소 ID 조회
plants_res = client.table("plants").select("id").execute()
plant_ids = [p['id'] for p in plants_res.data]
updated_count = 0
for pid in plant_ids:
# 2. 해당 월의 Daily 합계 조회
d_res = client.table("daily_stats").select("total_generation") \
.eq("plant_id", pid) \
.gte("date", f"{target_month}-01") \
.lte("date", f"{target_month}-31") \
.execute()
if not d_res.data:
continue
total_gen = sum(r.get('total_generation', 0) or 0 for r in d_res.data)
# 3. Monthly Upsert
client.table("monthly_stats").upsert({
"plant_id": pid,
"month": target_month,
"total_generation": round(total_gen, 2),
"updated_at": datetime.now().isoformat()
}, on_conflict="plant_id, month").execute()
print(f" {pid}: {total_gen:.1f}kWh (Month Total)")
updated_count += 1
print("-" * 60)
print(f"{updated_count}개 발전소 월간 통계 갱신 완료")
return True
except Exception as e:
print(f" ❌ 월간 집계 실패: {e}")
return False
if __name__ == "__main__":
import sys
from datetime import timedelta
# 인자로 날짜 지정 가능: python daily_summary.py 2026-01-22
if len(sys.argv) > 1:
target_date = sys.argv[1]
else:
# 인자 없으면 '어제' 날짜를 기본값으로 사용
# (새벽에 실행하여 전날 데이터를 마감하는 시나리오)
yesterday = datetime.now() - timedelta(days=1)
target_date = yesterday.strftime('%Y-%m-%d')
print(f" 날짜 미지정 -> 어제({target_date}) 기준으로 집계합니다.")
# 1. 일간 통계 집계
success = calculate_daily_stats(target_date)
# 2. 월말 체크 및 월간 집계 트리거
# target_date가 해당 월의 마지막 날이면 월간 집계 실행
if success:
try:
current_dt = datetime.strptime(target_date, '%Y-%m-%d')
import calendar
last_day = calendar.monthrange(current_dt.year, current_dt.month)[1]
if current_dt.day == last_day:
target_month = current_dt.strftime('%Y-%m')
print(f"\n🔔 월말({target_date}) 감지 -> {target_month} 월간 집계 실행")
calculate_monthly_stats(target_month)
except Exception as e:
print(f"⚠️ 월간 집계 트리거 오류: {e}")

313
database.py Normal file
View File

@ -0,0 +1,313 @@
# ==========================================
# database.py - Supabase 연동
# ==========================================
import os
from datetime import datetime
# 환경 변수에서 Supabase 설정 로드
SUPABASE_URL = os.getenv('SUPABASE_URL', '')
SUPABASE_KEY = os.getenv('SUPABASE_KEY', '')
print(f"DEBUG: SUPABASE_URL prefix: {SUPABASE_URL[:15] if SUPABASE_URL else 'None'}")
_supabase_client = None
def get_supabase_client():
"""Supabase 클라이언트 싱글턴 반환"""
global _supabase_client
if _supabase_client is None:
if not SUPABASE_URL or not SUPABASE_KEY:
print("⚠️ SUPABASE_URL 또는 SUPABASE_KEY가 설정되지 않았습니다.")
print(" .env 파일을 확인하거나 환경 변수를 설정하세요.")
return None
try:
from supabase import create_client
_supabase_client = create_client(SUPABASE_URL, SUPABASE_KEY)
print("✅ Supabase 연결 성공")
except ImportError:
print("⚠️ supabase 패키지가 설치되지 않았습니다.")
print(" pip install supabase 실행하세요.")
return None
except Exception as e:
print(f"⚠️ Supabase 연결 실패: {e}")
return None
return _supabase_client
def save_to_supabase(data_list):
"""
수집된 발전 데이터를 Supabase solar_logs 테이블에 저장
Args:
data_list: [{'id': 'nrems-01', 'name': '...', 'kw': 10.5, 'today': 100.0, 'status': '...'}]
Returns:
bool: 저장 성공 여부
"""
if not data_list:
print("[DB] 저장할 데이터가 없습니다.")
return False
client = get_supabase_client()
if client is None:
print("[DB 저장 생략] Supabase 연결 없음")
return False
try:
# 저장할 레코드 생성
records = []
for item in data_list:
plant_id = item.get('id', '')
# id가 없는 경우 건너뛰기
if not plant_id:
print(f" ⚠️ '{item.get('name', 'Unknown')}' ID 없음, 건너뜀")
continue
# 한국 시간(KST) 타임스탬프 생성
from datetime import timezone, timedelta
kst = timezone(timedelta(hours=9))
kst_now = datetime.now(kst).isoformat()
record = {
'plant_id': plant_id,
'current_kw': float(item.get('kw', 0)),
'today_kwh': float(item.get('today', 0)),
'status': item.get('status', ''),
'created_at': kst_now # 한국 시간으로 저장
}
records.append(record)
if not records:
print("[DB] 저장할 유효한 레코드가 없습니다.")
return False
# Supabase에 일괄 삽입 (solar_logs)
result = client.table("solar_logs").insert(records).execute()
print(f"✅ [DB] Supabase 저장 완료: {len(records)}건 (solar_logs)")
# daily_stats 테이블 업데이트 (Upsert)
# 오늘 날짜(KST) 기준, 현재 수집된 today_kwh가 기존 값보다 크거나 같으면 업데이트
# 하지만 보통 today_kwh는 누적값이므로 간단하게 upsert 처리
daily_records = []
kst_date_str = datetime.now(timezone(timedelta(hours=9))).strftime("%Y-%m-%d")
for item in data_list:
plant_id = item.get('id', '')
if not plant_id: continue
today_val = float(item.get('today', 0))
# 0인 경우는 저장하지 않거나(새벽), 기존 값을 덮어쓰지 않도록 주의해야 함
# 하지만 발전소 데이터 보정을 위해 0이어도 일단 기록하거나,
# 아니면 max 값을 유지하는 로직이 필요할 수 있음.
# 여기서는 Upsert로 덮어쓰되, DB 트리거가 없다면 마지막 값이 저장됨.
# 보통 크롤링은 누적값이므로 마지막 값이 그날의 최종값에 가까움.
daily_records.append({
"plant_id": plant_id,
"date": kst_date_str,
"total_generation": today_val,
"created_at": kst_now, # 생성/수정일
"updated_at": kst_now
})
if daily_records:
# upsert: plant_id, date가 unique constraint여야 함
try:
# ignore_duplicates=False -> 업데이트
# on_conflict="plant_id, date" (Supabase/PG 설정에 따라 다름, 보통 PK나 UK 기준)
stats_result = client.table("daily_stats").upsert(daily_records, on_conflict="plant_id, date").execute()
print(f"✅ [DB] daily_stats 업데이트 완료: {len(daily_records)}")
except Exception as e:
print(f"⚠️ [DB] daily_stats 업데이트 실패: {e}")
for r in records:
print(f"{r['plant_id']}: {r['current_kw']} kW / {r['today_kwh']} kWh")
return True
except Exception as e:
print(f"❌ [DB] Supabase 저장 실패: {e}")
return False
def save_to_console(data_list):
"""콘솔에 데이터 출력"""
if not data_list:
print("⚠️ 출력할 데이터가 없습니다.")
return
print("\n" + "=" * 75)
print("📊 [실시간 통합 현황판]")
print("=" * 75)
print(f"{'발전소명':<20} | {'현재출력(kW)':>12} | {'금일발전(kWh)':>12} | {'상태'}")
print("-" * 75)
total_kw = 0
total_today = 0
for d in data_list:
name = d.get('name', 'N/A')
kw = d.get('kw', 0)
today = d.get('today', 0)
status = d.get('status', '')
total_kw += kw
total_today += today
print(f"{name:<20} | {kw:>12.2f} | {today:>12.2f} | {status}")
print("-" * 75)
print(f"{'합계':<20} | {total_kw:>12.2f} | {total_today:>12.2f} |")
print("=" * 75)
def save_history(data_list, data_type='hourly'):
"""
과거 데이터 저장 (Hourly, Daily, Monthly)
Args:
data_list: 데이터 리스트
data_type: 'hourly', 'daily', 'monthly'
"""
if not data_list:
return False
client = get_supabase_client()
if client is None:
return False
try:
table_name = ""
records = []
if data_type == 'hourly':
table_name = "solar_logs"
for item in data_list:
# hourly 데이터는 timestamp 키를 가짐
ts = item.get('timestamp')
if ts:
ts_iso = ts.replace(' ', 'T')
# Check if future (simple string comparison works for ISO format if consistent, but datetime is safer)
# KST aware comparison
from datetime import timezone, timedelta
kst = timezone(timedelta(hours=9))
now_kst = datetime.now(kst)
try:
# ts example: 2026-01-27 14:00:00. Assume input is local time (KST)
# We convert it to aware datetime
dt_ts = datetime.fromisoformat(ts_iso)
if dt_ts.tzinfo is None:
dt_ts = dt_ts.replace(tzinfo=kst)
if dt_ts > now_kst:
continue # Skip future data
except ValueError:
pass # robust date parsing needed if format varies
# Ensure timezone is sent to Supabase to prevent UTC assumption
final_created_at = dt_ts.isoformat()
records.append({
'plant_id': item['plant_id'],
'created_at': final_created_at,
'current_kw': float(item.get('current_kw', 0) or item.get('generation_kwh', 0)),
'today_kwh': float(item.get('generation_kwh', 0)),
'status': 'History'
})
elif data_type == 'daily':
table_name = "daily_stats"
for item in data_list:
records.append({
'plant_id': item['plant_id'],
'date': item['date'],
'total_generation': float(item.get('generation_kwh', 0))
# 'updated_at': datetime.now().isoformat()
})
elif data_type == 'monthly':
table_name = "monthly_stats"
for item in data_list:
records.append({
'plant_id': item['plant_id'],
'month': item['month'], # YYYY-MM
'total_generation': float(item.get('generation_kwh', 0)),
'updated_at': datetime.now().isoformat()
})
if not records:
return False
# upsert 사용
if data_type == 'hourly':
# hourly는 시간값 중복 시 업데이트? solar_logs는 보통 log table이라 pk가 id일 수 있음.
# 하지만 과거 내역이므로 중복 방지가 필요. created_at 기준?
# solar_logs에 unique constraints가 plant_id, created_at에 있는지 불확실.
# 일단 insert로 시도
client.table(table_name).insert(records).execute()
elif data_type == 'daily':
client.table(table_name).upsert(records, on_conflict="plant_id, date").execute()
# [Auto Update] Daily 데이터 저장 시 Monthly 통계 자동 갱신
# 1. 업데이트된 월 목록 추출
updated_months = set()
for rec in records:
try:
# date: YYYY-MM-DD
month_key = rec['date'][:7]
updated_months.add((rec['plant_id'], month_key))
except:
pass
if updated_months:
monthly_upserts = []
for (pid, m_key) in updated_months:
# 2. 해당 월의 Daily 합계 조회 (DB Aggregation)
# start_date ~ end_date 범위 쿼리가 필요하지만,
# supabase-py에서는 .select('total_generation.sum()') 같은 게 잘 안됨.
# 그냥 해당 월 데이터를 가져와서 파이썬에서 합산 (데이터 최대 31개라 매우 가벼움)
start_d = f"{m_key}-01"
# end_d 로직 복잡하므로 그냥 문자열 필터로 (YYYY-MM-01 ~ YYYY-MM-31)
# like는 지원 안 할 수 있으므로 date >= start AND date <= end
# 다음달 1일 전까지
# 쿼리: select total_generation where plant_id=X and date like 'YYYY-MM%'
# but 'like' operator might differ.
# Simpler: gte "YYYY-MM-01", lte "YYYY-MM-31"
d_res = client.table("daily_stats").select("total_generation") \
.eq("plant_id", pid) \
.gte("date", f"{m_key}-01") \
.lte("date", f"{m_key}-31") \
.execute()
total_gen = sum(r['total_generation'] or 0 for r in d_res.data)
monthly_upserts.append({
"plant_id": pid,
"month": m_key,
"total_generation": round(total_gen, 2),
"updated_at": datetime.now().isoformat()
})
# 3. Monthly Upsert
if monthly_upserts:
client.table("monthly_stats").upsert(monthly_upserts, on_conflict="plant_id, month").execute()
print(f" 🔄 [Sync] {len(monthly_upserts)}개월치 Monthly Stats 자동 갱신 완료")
elif data_type == 'monthly':
client.table(table_name).upsert(records, on_conflict="plant_id, month").execute()
print(f"✅ [History] {data_type} 데이터 {len(records)}건 저장 완료")
return True
except Exception as e:
print(f"❌ [History] 저장 실패 ({data_type}): {e}")
return False

138
fetch_history.py Normal file
View File

@ -0,0 +1,138 @@
import sys
import os
import importlib
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from dotenv import load_dotenv
# .env 로드
load_dotenv()
# Windows 인코딩 문제 해결
if sys.platform.startswith('win'):
sys.stdout.reconfigure(encoding='utf-8')
sys.stderr.reconfigure(encoding='utf-8')
# 프로젝트 루트 경로 추가
current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(current_dir)
from config import get_all_plants
from database import save_history
def get_plant_config(target_id):
plants = get_all_plants()
for p in plants:
# 일반 매칭
if p.get('id') == target_id:
return p
# NREMS 분리 세대 매칭 (nrems-01, nrems-02)
if p.get('options', {}).get('is_split'):
if target_id == 'nrems-01':
p['id'] = 'nrems-01'
p['options']['split_index'] = 1
return p
elif target_id == 'nrems-02':
p['id'] = 'nrems-02'
p['options']['split_index'] = 2
return p
return None
def fetch_and_save(plant_config):
plant_id = plant_config['id']
plant_type = plant_config['type']
plant_name = plant_config['name']
start_date_str = plant_config.get('start_date', '2020-01-01')
print(f"🚀 [{plant_name}] 과거 데이터 수집 시작 ({plant_id})")
print(f" 타입: {plant_type}, 가동개시일: {start_date_str}")
# 크롤러 모듈 동적 임포트
try:
crawler_module = importlib.import_module(f"crawlers.{plant_type}")
except ImportError:
print(f"❌ 크롤러 모듈을 찾을 수 없습니다: crawlers/{plant_type}.py")
return
now = datetime.now()
today_str = now.strftime("%Y-%m-%d")
current_year = now.year
current_month = now.month
# 1. 시간별 데이터 (Hourly): 이번 달 1일 ~ 오늘
# (역순으로 가져오라고 했지만, 크롤러는 start->end로 동작하므로 범위로 호출)
try:
h_start = now.replace(day=1).strftime("%Y-%m-%d")
h_end = today_str
print(f"\n⏳ [Hourly] 수집 : {h_start} ~ {h_end}")
if hasattr(crawler_module, 'fetch_history_hourly'):
hourly_data = crawler_module.fetch_history_hourly(plant_config, h_start, h_end)
if hourly_data:
save_history(hourly_data, 'hourly')
else:
print(" 데이터 없음")
else:
print(f" {plant_type}는 시간별 이력 수집을 지원하지 않음")
except Exception as e:
print(f"❌ [Hourly] 에러: {e}")
# 2. 일별 데이터 (Daily): 발전소 가동일 ~ 오늘
# API 서버가 daily_stats를 집계하여 월/년 통계를 보여주므로, daily 데이터를 전체 기간 수집해야 함.
try:
# d_start = f"{current_year}-01-01"
d_start = start_date_str # 가동 시작일부터 수집
d_end = today_str
print(f"\n⏳ [Daily] 수집 : {d_start} ~ {d_end}")
if hasattr(crawler_module, 'fetch_history_daily'):
daily_data = crawler_module.fetch_history_daily(plant_config, d_start, d_end)
if daily_data:
save_history(daily_data, 'daily')
else:
print(" 데이터 없음")
else:
print(f" {plant_type}는 일별 이력 수집을 지원하지 않음")
except Exception as e:
print(f"❌ [Daily] 에러: {e}")
# 3. 월별 데이터 (Monthly): 사용 안함 (API가 daily_stats 집계 사용)
# try:
# m_start_dt = datetime.strptime(start_date_str, "%Y-%m-%d")
# m_start = m_start_dt.strftime("%Y-%m")
# m_end = now.strftime("%Y-%m")
# print(f"\n⏳ [Monthly] 수집 : {m_start} ~ {m_end}")
#
# if hasattr(crawler_module, 'fetch_history_monthly'):
# monthly_data = crawler_module.fetch_history_monthly(plant_config, m_start, m_end)
# if monthly_data:
# save_history(monthly_data, 'monthly')
# else:
# print(" 데이터 없음")
# else:
# print(f" {plant_type}는 월별 이력 수집을 지원하지 않음")
#
# except Exception as e:
# print(f"❌ [Monthly] 에러: {e}")
except Exception as e:
print(f"❌ [Monthly] 에러: {e}")
print(f"\n✅ [{plant_name}] 모든 작업 완료")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python fetch_history.py <plant_id>")
sys.exit(1)
target_plant_id = sys.argv[1]
cfg = get_plant_config(target_plant_id)
if cfg:
fetch_and_save(cfg)
else:
print(f"❌ 설정을 찾을 수 없습니다: {target_plant_id}")

158
main.py Normal file
View File

@ -0,0 +1,158 @@
# ==========================================
# main.py - 태양광 발전 통합 관제 시스템
# ==========================================
import re
from datetime import datetime
# 환경 변수 로드 (최상단에서 실행)
try:
from dotenv import load_dotenv
load_dotenv()
print("✅ 환경 변수 로드 완료")
except ImportError:
print("⚠️ python-dotenv가 설치되지 않았습니다. 환경 변수를 직접 설정하세요.")
from config import get_all_plants
from database import save_to_supabase, save_to_console
from crawlers import get_crawler
from crawler_manager import CrawlerManager
# 스마트 스케줄러 초기화
crawler_manager = CrawlerManager()
def extract_unit_number(name):
"""발전소 이름에서 호기 번호 추출 (정렬용)"""
match = re.search(r'(\d+)호기', name)
if match:
return int(match.group(1))
return 999
def integrated_monitoring(save_to_db=True, company_filter=None, force_run=False):
"""
통합 모니터링 실행
Args:
save_to_db: True면 Supabase에 저장
company_filter: 특정 업체만 필터링 (: 'sunwind')
force_run: True면 스케줄러 무시하고 강제 실행
"""
now_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f"\n🚀 [통합 관제 시스템] 데이터 수집 시작... ({now_str})")
print("-" * 75)
# 평탄화된 발전소 목록 가져오기
all_plants = get_all_plants()
# 업체 필터링 (옵션)
if company_filter:
all_plants = [p for p in all_plants if p['company_id'] == company_filter]
print(f"📌 필터 적용: {company_filter}")
total_results = []
skipped_count = 0
for plant in all_plants:
plant_type = plant['type']
plant_name = plant.get('display_name', plant.get('name', 'Unknown'))
company_id = plant.get('company_id', '')
company_name = plant.get('company_name', '')
# 크롤링 결과에서 생성되는 site_id 목록 (1,2호기 분리 처리 고려)
is_split = plant.get('options', {}).get('is_split', False)
if is_split:
site_ids = ['nrems-01', 'nrems-02']
else:
site_ids = [plant.get('id', '')]
# 스마트 스케줄러 확인 (force_run이 아닌 경우)
if not force_run:
# 모든 site_id에 대해 should_run 확인 (하나라도 실행해야 하면 실행)
should_run_any = False
for site_id in site_ids:
if site_id:
crawler_manager.register_site(site_id)
if crawler_manager.should_run(site_id):
should_run_any = True
break
if not should_run_any:
print(f" ⏭️ [{plant_type.upper()}] {plant_name} 스킵 (스케줄 외)")
skipped_count += 1
continue
print(f"📡 [{plant_type.upper()}] {company_name} - {plant_name} 수집 중...")
try:
crawler_func = get_crawler(plant_type)
if crawler_func:
data = crawler_func(plant)
if data:
# company_id, company_name 주입
for item in data:
item['company_id'] = company_id
item['company_name'] = company_name
# 크롤링 성공 시 실행 기록
item_id = item.get('id', '')
if item_id:
crawler_manager.record_run(item_id)
total_results.extend(data)
else:
print(f" ⚠️ 알 수 없는 크롤러 타입: {plant_type}")
except Exception as e:
print(f"{plant_name} 실패: {e}")
# 정렬 (호기 번호 순)
total_results.sort(key=lambda x: extract_unit_number(x['name']))
# 중복 제거 (company_id + id 조합)
seen_keys = set()
unique_results = []
for item in total_results:
unique_key = f"{item.get('company_id', '')}_{item.get('id', '')}"
if unique_key not in seen_keys:
seen_keys.add(unique_key)
unique_results.append(item)
total_results = unique_results
print("-" * 75)
if skipped_count > 0:
print(f"📊 스킵된 사이트: {skipped_count}개 (스케줄 외)")
if total_results:
# 콘솔 출력
save_to_console(total_results)
# DB 저장
if save_to_db:
save_to_supabase(total_results)
# 이상 감지 로직
current_hour = datetime.now().hour
if 10 <= current_hour <= 17:
issues = [d['name'] for d in total_results if d.get('kw', 0) == 0]
if issues:
print("\n🚨 [이상 감지 리포트]")
for name in issues:
print(f" ⚠️ 경고: '{name}' 발전량이 0입니다! 확인 필요.")
else:
print("\n ✅ 현재 모든 발전소가 정상 가동 중입니다.")
else:
print("❌ 수집된 데이터가 없습니다.")
return total_results
if __name__ == "__main__":
import sys
# 인자 처리: --force 옵션으로 스케줄러 무시
force_run = '--force' in sys.argv or '-f' in sys.argv
if force_run:
print("⚡ [강제 실행 모드] 스케줄러 무시하고 모든 사이트 크롤링")
integrated_monitoring(save_to_db=True, force_run=force_run)

91
sync_plants.py Normal file
View File

@ -0,0 +1,91 @@
# ==========================================
# sync_plants.py - 발전소 정보 동기화
# ==========================================
# config.py의 발전소 정보를 Supabase plants 테이블에 Upsert
from datetime import datetime
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
pass
from config import get_all_plants
from database import get_supabase_client
def sync_plants():
"""
로컬 config.py의 발전소 정보를 Supabase plants 테이블에 동기화
"""
print(f"\n🔄 [발전소 동기화] 시작... ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})")
print("-" * 60)
client = get_supabase_client()
if not client:
print("❌ Supabase 연결 실패")
return False
plants = get_all_plants()
# 중복 제거 (is_split인 1,2호기는 별도 처리)
unique_plants = {}
for plant in plants:
plant_id = plant.get('id', '')
is_split = plant.get('options', {}).get('is_split', False)
if is_split:
# 1, 2호기 분리 (용량 N빵)
total_capacity = plant.get('capacity_kw', 100.0)
unit_capacity = total_capacity / 2
start_date = plant.get('start_date', '')
unique_plants['nrems-01'] = {
'id': 'nrems-01',
'name': f"{plant.get('company_name', '')} 1호기",
'type': plant.get('type', ''),
'capacity': unit_capacity,
'constructed_at': start_date,
'company_id': 1
}
unique_plants['nrems-02'] = {
'id': 'nrems-02',
'name': f"{plant.get('company_name', '')} 2호기",
'type': plant.get('type', ''),
'capacity': unit_capacity,
'constructed_at': start_date,
'company_id': 1
}
elif plant_id:
unique_plants[plant_id] = {
'id': plant_id,
'name': f"{plant.get('company_name', '')} {plant.get('name', '')}",
'type': plant.get('type', ''),
'capacity': plant.get('capacity_kw', 0.0),
'constructed_at': plant.get('start_date', ''),
'company_id': 1
}
success_count = 0
for plant_id, plant_data in unique_plants.items():
try:
result = client.table("plants").upsert(
plant_data,
on_conflict="id"
).execute()
print(f"{plant_data['name']} (용량: {plant_data['capacity']} kW)")
success_count += 1
except Exception as e:
print(f"{plant_id} 실패: {e}")
print("-" * 60)
print(f"✅ 동기화 완료: {success_count}/{len(unique_plants)}")
return True
if __name__ == "__main__":
sync_plants()

View File

@ -0,0 +1,128 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
1 28, 29 데이터 확인 스크립트
"""
from datetime import datetime
import sys
import os
print("Starting checks...", flush=True)
# Add parent directory to path to import modules
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
print(f"Current dir: {current_dir}", flush=True)
print(f"Parent dir: {parent_dir}", flush=True)
print(f"Sys path: {sys.path}", flush=True)
try:
from crawlers import nrems, hyundai, kremc, sun_wms, cmsolar
from config import SYSTEM_CONSTANTS
print("Imports successful", flush=True)
except Exception as e:
print(f"Import failed: {e}", flush=True)
import traceback
traceback.print_exc()
sys.exit(1)
def check_dates(plant_config, crawler_module, start_date, end_date):
plant_name = plant_config['name']
print(f"\n[{plant_name}] 데이터 확인: {start_date} ~ {end_date}")
try:
# Check daily data
daily_data = crawler_module.fetch_history_daily(plant_config, start_date, end_date)
if not daily_data:
print(" ❌ 데이터 없음")
return
print(f"{len(daily_data)}일 데이터 수신")
for record in daily_data:
print(f" - 날짜: {record.get('date', 'Unknown')}, 발전량: {record.get('generation_kwh', 0)} kWh")
except Exception as e:
print(f" ❌ 오류 발생: {str(e)}")
# import traceback
# traceback.print_exc()
def main():
print(">>> 1월 28, 29일 데이터 확인 <<<")
# Dates to check
start_date = '2026-01-28'
end_date = '2026-01-29'
test_plants = [
# NREMS 1,2호기 (분리)
({'id': 'nrems-01', 'name': '1호기', 'type': 'nrems',
'auth': {'pscode': 'duce2023072288'},
'options': {'is_split': True, 'unit_id': 1},
'system': SYSTEM_CONSTANTS['nrems']}, nrems),
({'id': 'nrems-02', 'name': '2호기', 'type': 'nrems',
'auth': {'pscode': 'duce2023072288'},
'options': {'is_split': True, 'unit_id': 2},
'system': SYSTEM_CONSTANTS['nrems']}, nrems),
# NREMS 3호기
({'id': 'nrems-03', 'name': '3호기', 'type': 'nrems',
'auth': {'pscode': 'dc2023121086'},
'options': {},
'system': SYSTEM_CONSTANTS['nrems']}, nrems),
# NREMS 4호기
({'id': 'nrems-04', 'name': '4호기', 'type': 'nrems',
'auth': {'pscode': 'duce2023072269'},
'options': {},
'system': SYSTEM_CONSTANTS['nrems']}, nrems),
# NREMS 9호기
({'id': 'nrems-09', 'name': '9호기', 'type': 'nrems',
'auth': {'pscode': 'a2020061008'},
'options': {},
'system': SYSTEM_CONSTANTS['nrems']}, nrems),
# KREMC 5호기
({'id': 'kremc-05', 'name': '5호기', 'type': 'kremc',
'auth': {'user_id': '서대문도서관', 'password': 'sunhope5!'},
'options': {'cid': '10013000376', 'cityProvCode': '11', 'rgnCode': '11410',
'dongCode': '1141011700', 'enso_type_code': '15001'},
'system': SYSTEM_CONSTANTS['kremc']}, kremc),
# Sun-WMS 6호기
({'id': 'sunwms-06', 'name': '6호기', 'type': 'sun_wms',
'auth': {'payload_id': 'kc0fXUW0LUm2wZa+2NQI0Q==', 'payload_pw': 'PGXjU6ib2mKYwtrh2i3fIQ=='},
'options': {},
'system': SYSTEM_CONSTANTS['sun_wms']}, sun_wms),
# Hyundai 8호기
({'id': 'hyundai-08', 'name': '8호기', 'type': 'hyundai',
'auth': {'user_id': 'epecoop', 'password': 'sunhope0419', 'site_id': 'M0494'},
'options': {},
'system': SYSTEM_CONSTANTS['hyundai']}, hyundai),
# CMSolar 10호기 (Fix login info from verify_data.py if valid, otherwise use config.py's)
# Using config.py's info but updated with values seen in verify_data.py which seemed to be used for testing
# verify_data.py had: 'login_id': 'smart3131', 'password': 'ehdrb!123'
# config.py has: 'login_id': 'sy7144', 'login_pw': 'sy7144'
# I should probably use what is in config.py OR verify_data.py. Let's try config.py first as it is the source of truth usually,
# BUT wait, verify_data.py was likely used recently.
# Let's check config.py again. Config.py has 'sy7144'. verify_data.py has 'smart3131'.
# The user history mentioned "Debugging Real-time Crawlers" and "CMSolar".
# Let's check `crawler/crawlers/cmsolar.py` to see what it expects or if there are hardcoded overrides.
({'id': 'cmsolar-10', 'name': '10호기', 'type': 'cmsolar',
'auth': {'login_id': 'sy7144', 'login_pw': 'sy7144', 'site_no': '834'},
'options': {},
'system': SYSTEM_CONSTANTS['cmsolar']}, cmsolar),
]
for plant_config, crawler_module in test_plants:
check_dates(plant_config, crawler_module, start_date, end_date)
if __name__ == '__main__':
main()

26
tests/check_today_10.py Normal file
View File

@ -0,0 +1,26 @@
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from dotenv import load_dotenv
load_dotenv()
from database import get_supabase_client
from datetime import datetime, timezone, timedelta
def check_today():
c = get_supabase_client()
# Today in KST
kst = timezone(timedelta(hours=9))
now = datetime.now(kst)
today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
print(f"Checking data since {today_start.isoformat()} (KST)")
res = c.table('solar_logs').select('created_at, current_kw, today_kwh, status').eq('plant_id', 'cmsolar-10').gte('created_at', today_start.isoformat()).order('created_at', desc=True).execute()
print(f"Found {len(res.data)} records for today:")
for item in res.data:
print(f"{item['created_at']} | {item.get('current_kw')} kW")
if __name__ == "__main__":
check_today()

51
tests/debug_cmsolar.py Normal file
View File

@ -0,0 +1,51 @@
import requests
from config import get_all_plants
from crawlers.cmsolar import fetch_data
from crawlers.base import create_session
def debug_cmsolar():
plants = get_all_plants()
target = next((p for p in plants if p['id'] == 'cmsolar-10'), None)
if not target:
print("Plant 10 not found")
return
print(f"Debug target: {target['name']}")
# Manually reproduce fetch_data logic to see raw response
auth = target.get('auth', {})
system = target.get('system', {})
login_id = auth.get('login_id', '') # config.py uses login_id? checking cmsolar.py it uses payload_id or auth get directly.
# config.py for cmsolar-10:
# 'auth': { 'login_id': 'sy7144', 'login_pw': 'sy7144', 'site_no': '834' }
# cmsolar.py fetch_data:
# login_id = auth.get('payload_id', '') -> THIS MIGHT BE WRONG if config keys are login_id
# Check config.py again for cmsolar-10 auth keys.
# Lines 154-158 in config.py:
# 'auth': { 'login_id': 'sy7144', 'login_pw': 'sy7144', 'site_no': '834' }
# cmsolar.py Lines 20-22:
# login_id = auth.get('payload_id', '')
# login_pw = auth.get('payload_pw', '')
# site_no = auth.get('site_no', '')
# WAIT! 'payload_id' vs 'login_id'.
# If the code expects 'payload_id' but config provides 'login_id', then login_id will be empty string.
# This might be the bug.
print(f"Auth keys in config: {list(auth.keys())}")
# Let's try to run fetch_data and catch exception
try:
result = fetch_data(target)
print(f"Result: {result}")
except Exception as e:
print(f"Exception: {e}")
if __name__ == "__main__":
debug_cmsolar()

View File

@ -0,0 +1,85 @@
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from dotenv import load_dotenv
load_dotenv()
from config import get_all_plants
from crawlers.base import create_session
def debug_cmsolar_realtime():
plants = get_all_plants()
target = next((p for p in plants if p['id'] == 'cmsolar-10'), None)
if not target:
print("Plant 10 not found")
return
print(f"Debug target: {target['name']}")
# Extract info
auth = target.get('auth', {})
system = target.get('system', {})
login_id = auth.get('login_id', '')
login_pw = auth.get('login_pw', '')
site_no = auth.get('site_no', '')
login_url = system.get('login_url', '')
data_url = system.get('data_url', '')
print(f"Login ID: {login_id}")
print(f"Login URL: {login_url}")
print(f"Data URL: {data_url}")
session = create_session()
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded'
}
# Login
login_data = {
'login_id': login_id,
'login_pw': login_pw,
'site_no': site_no
}
print("Logging in...")
try:
res = session.post(login_url, data=login_data, headers=headers)
print(f"Login Status: {res.status_code}")
# Site selection
base_url = "http://www.cmsolar2.kr"
change_url = f"{base_url}/change.php?site={site_no}"
print(f"Selecting site via {change_url}...")
session.get(change_url, headers=headers)
except Exception as e:
print(f"Login/Select Error: {e}")
return
# Fetch Data
real_data_url = f"{base_url}/plant/sub/idx_ok.php?mode=getPlant"
print(f"Fetching data from {real_data_url}...")
try:
res = session.get(real_data_url, headers=headers)
print(f"Data Status: {res.status_code}")
# print(f"Data Content-Type: {res.headers.get('Content-Type')}")
print(f"Data Response:\n{res.text}")
try:
json_data = res.json()
print(f"JSON parsed successfully.")
except Exception as e:
print(f"JSON Parse Error: {e}")
except Exception as e:
print(f"Data Fetch Error: {e}")
if __name__ == "__main__":
debug_cmsolar_realtime()

14
tests/debug_db_check.py Normal file
View File

@ -0,0 +1,14 @@
from dotenv import load_dotenv
load_dotenv()
from database import get_supabase_client
def check_db():
c = get_supabase_client()
res = c.table('solar_logs').select('created_at, current_kw, today_kwh').eq('plant_id', 'cmsolar-10').order('created_at', desc=True).limit(30).execute()
print("Recent logs for cmsolar-10:")
for item in res.data:
print(f"{item['created_at']} | {item.get('current_kw', 'N/A')} kW | {item.get('today_kwh', 'N/A')} kWh")
if __name__ == "__main__":
check_db()

43
tests/debug_kremc.py Normal file
View File

@ -0,0 +1,43 @@
import requests
from dotenv import load_dotenv
load_dotenv()
from config import get_all_plants
from crawlers.kremc import fetch_data
from crawlers.base import create_session
def debug_kremc():
plants = get_all_plants()
# 5호기 (kremc) 찾기 - id가 kremc-05인 것
target = next((p for p in plants if p['id'] == 'kremc-05'), None)
if not target:
print("Plant kremc-05 not found")
return
print(f"Debug target: {target['name']}")
print(f"Debug target: {target['name']}")
from datetime import datetime
today = datetime.now().strftime('%Y-%m-%d')
print(f"Fetching hourly history for {today}...")
from crawlers.kremc import fetch_history_hourly
from database import save_history
try:
results = fetch_history_hourly(target, today, today)
print(f"Hourly Results ({len(results)}):")
for r in results:
print(f" {r['timestamp']}: {r['generation_kwh']} kWh")
if results:
print("Saving to DB...")
save_history(results, 'hourly')
print("Done.")
except Exception as e:
print(f"Exception: {e}")
if __name__ == "__main__":
debug_kremc()

View File

@ -0,0 +1,30 @@
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from dotenv import load_dotenv
load_dotenv()
from config import get_all_plants
from crawlers.kremc import fetch_data
def debug_kremc_realtime():
plants = get_all_plants()
target = next((p for p in plants if p['id'] == 'kremc-05'), None)
if not target:
print("Plant 5 not found")
return
print(f"Debug target: {target['name']}")
try:
print("Fetching data...")
results = fetch_data(target)
print(f"Results: {results}")
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
debug_kremc_realtime()

53
tests/fill_today_data.py Normal file
View File

@ -0,0 +1,53 @@
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from dotenv import load_dotenv
load_dotenv()
from datetime import datetime
from database import get_supabase_client, save_history
from config import get_all_plants
from crawlers.kremc import fetch_history_hourly as fetch_kremc
from crawlers.cmsolar import fetch_history_hourly as fetch_cmsolar
def cleanup_history(plant_id, today_str):
client = get_supabase_client()
# Delete 'History' status records for today to avoid duplicates/bad data
# Filter by created_at >= today's start and status='History'
# Simple approach: delete records with status='History' created today
# KST date string is tricky for created_at (UTC), but status='History' is unique to our manual script
try:
res = client.table('solar_logs').delete().eq('plant_id', plant_id).eq('status', 'History').execute()
print(f"[{plant_id}] Cleaned up {len(res.data)} old history records.")
except Exception as e:
print(f"[{plant_id}] Cleanup failed (might be empty): {e}")
def fill_today_data():
plants = get_all_plants()
kremc_plant = next((p for p in plants if p['id'] == 'kremc-05'), None)
cmsolar_plant = next((p for p in plants if p['id'] == 'cmsolar-10'), None)
today = "2026-01-29"
print(f"Filling data for {today}...")
# 1. KREMC (5호기) - Skip as it's done
# if kremc_plant: ...
# 2. CMSolar (10호기)
if cmsolar_plant:
print("\n--- Processing CMSolar (10호기) ---")
cleanup_history('cmsolar-10', today)
try:
results = fetch_cmsolar(cmsolar_plant, today, today)
print(f"Fetched results: {results}")
if results:
save_history(results, 'hourly')
print("Saved CMSolar data.")
except Exception as e:
print(f"CMSolar Error: {e}")
if __name__ == "__main__":
fill_today_data()

249
verify_data.py Normal file
View File

@ -0,0 +1,249 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
데이터 검증 스크립트
발전소별로 특정 날짜//연도의 실제 데이터를 조회하여 검증
"""
from datetime import datetime
from crawlers import nrems, hyundai, kremc, sun_wms, cmsolar
from config import SYSTEM_CONSTANTS
def format_hourly_data(data, date_str, plant_name):
"""시간별 데이터 포맷팅"""
print(f"\n{'='*80}")
print(f"[{plant_name}] 시간별 데이터: {date_str}")
print(f"{'='*80}")
if not data:
print(" ❌ 데이터 없음")
return
# 시간별로 그룹화
hourly_dict = {}
for record in data:
timestamp = record.get('timestamp', '')
if timestamp.startswith(date_str):
hour = timestamp.split(' ')[1][:2] if ' ' in timestamp else '00'
kwh = record.get('generation_kwh', 0)
if hour not in hourly_dict:
hourly_dict[hour] = 0
hourly_dict[hour] += kwh
if not hourly_dict:
print(" ❌ 해당 날짜 데이터 없음")
return
print(f"{len(hourly_dict)}시간 데이터")
print(f"\n {'시간':<8} {'발전량(kWh)':<15}")
print(f" {'-'*25}")
total = 0
for hour in sorted(hourly_dict.keys()):
kwh = hourly_dict[hour]
total += kwh
print(f" {hour}:00 {kwh:>10.2f}")
print(f" {'-'*25}")
print(f" {'합계':<8} {total:>10.2f}")
def format_daily_data(data, year_month, plant_name):
"""일별 데이터 포맷팅"""
print(f"\n{'='*80}")
print(f"[{plant_name}] 일별 데이터: {year_month}")
print(f"{'='*80}")
if not data:
print(" ❌ 데이터 없음")
return
# 해당 월의 데이터만 필터링
monthly_data = [d for d in data if d.get('date', '').startswith(year_month)]
if not monthly_data:
print(" ❌ 해당 월 데이터 없음")
return
print(f"{len(monthly_data)}일 데이터")
print(f"\n {'날짜':<15} {'발전량(kWh)':<15}")
print(f" {'-'*30}")
total = 0
for record in sorted(monthly_data, key=lambda x: x.get('date', '')):
date = record.get('date', '')
kwh = record.get('generation_kwh', 0)
total += kwh
print(f" {date:<15} {kwh:>10.2f}")
print(f" {'-'*30}")
print(f" {'합계':<15} {total:>10.2f}")
def format_monthly_data(data, year, plant_name):
"""월별 데이터 포맷팅"""
print(f"\n{'='*80}")
print(f"[{plant_name}] 월별 데이터: {year}")
print(f"{'='*80}")
if not data:
print(" ❌ 데이터 없음")
return
# 해당 연도의 데이터만 필터링
yearly_data = [d for d in data if d.get('month', '').startswith(year)]
if not yearly_data:
print(" ❌ 해당 연도 데이터 없음")
return
print(f"{len(yearly_data)}개월 데이터")
print(f"\n {'':<10} {'발전량(kWh)':<15}")
print(f" {'-'*25}")
total = 0
for record in sorted(yearly_data, key=lambda x: x.get('month', '')):
month = record.get('month', '')
kwh = record.get('generation_kwh', 0)
total += kwh
print(f" {month:<10} {kwh:>10.2f}")
print(f" {'-'*25}")
print(f" {'합계':<10} {total:>10.2f}")
if len(yearly_data) > 0:
print(f" {'평균':<10} {total/len(yearly_data):>10.2f}")
def verify_plant(plant_config, crawler_module):
"""개별 발전소 데이터 검증"""
plant_name = plant_config['name']
print(f"\n{'#'*80}")
print(f"# {plant_name}")
print(f"{'#'*80}")
try:
# 1. 시간별 데이터: 2025-05-10, 2024-10-20
print(f"\n[1/6] 시간별 데이터 수집 중...")
hourly_2025 = crawler_module.fetch_history_hourly(plant_config, '2025-05-10', '2025-05-10')
format_hourly_data(hourly_2025, '2025-05-10', plant_name)
hourly_2024 = crawler_module.fetch_history_hourly(plant_config, '2024-10-20', '2024-10-20')
format_hourly_data(hourly_2024, '2024-10-20', plant_name)
# 2. 일별 데이터: 2025-05, 2024-07
print(f"\n[2/6] 일별 데이터 수집 중...")
daily_2025 = crawler_module.fetch_history_daily(plant_config, '2025-05-01', '2025-05-31')
format_daily_data(daily_2025, '2025-05', plant_name)
daily_2024 = crawler_module.fetch_history_daily(plant_config, '2024-07-01', '2024-07-31')
format_daily_data(daily_2024, '2024-07', plant_name)
# 3. 월별 데이터: 2024년, 2025년
print(f"\n[3/6] 월별 데이터 수집 중...")
monthly_2025 = crawler_module.fetch_history_monthly(plant_config, '2025-01', '2025-12')
format_monthly_data(monthly_2025, '2025', plant_name)
monthly_2024 = crawler_module.fetch_history_monthly(plant_config, '2024-01', '2024-12')
format_monthly_data(monthly_2024, '2024', plant_name)
print(f"\n>>> {plant_name} 검증 완료")
except Exception as e:
print(f"\n ❌ 오류 발생: {str(e)}")
import traceback
traceback.print_exc()
def main():
"""메인 함수"""
print("\n" + "="*80)
print(">>> 발전소 데이터 검증 스크립트 <<<")
print("="*80)
print(f"검증 일시: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("\n[검증 대상]")
print(" - 시간별: 2025-05-10, 2024-10-20")
print(" - 일별: 2025년 5월, 2024년 7월")
print(" - 월별: 2025년, 2024년")
# 테스트 대상 발전소 설정
test_plants = [
# NREMS 1,2호기 (분리)
({'id': 'nrems-01', 'name': '1호기', 'type': 'nrems',
'auth': {'pscode': 'duce2023072288'},
'options': {'is_split': True, 'unit_id': 1},
'system': SYSTEM_CONSTANTS['nrems']}, nrems),
({'id': 'nrems-02', 'name': '2호기', 'type': 'nrems',
'auth': {'pscode': 'duce2023072288'},
'options': {'is_split': True, 'unit_id': 2},
'system': SYSTEM_CONSTANTS['nrems']}, nrems),
# NREMS 3호기
({'id': 'nrems-03', 'name': '3호기', 'type': 'nrems',
'auth': {'pscode': 'dc2023121086'},
'options': {},
'system': SYSTEM_CONSTANTS['nrems']}, nrems),
# NREMS 4호기
({'id': 'nrems-04', 'name': '4호기', 'type': 'nrems',
'auth': {'pscode': 'duce2023072269'},
'options': {},
'system': SYSTEM_CONSTANTS['nrems']}, nrems),
# NREMS 9호기
({'id': 'nrems-09', 'name': '9호기', 'type': 'nrems',
'auth': {'pscode': 'a2020061008'},
'options': {},
'system': SYSTEM_CONSTANTS['nrems']}, nrems),
# KREMC 5호기
({'id': 'kremc-05', 'name': '5호기', 'type': 'kremc',
'auth': {'user_id': '서대문도서관', 'password': 'sunhope5!'},
'options': {'cid': '10013000376', 'cityProvCode': '11', 'rgnCode': '11410',
'dongCode': '1141011700', 'enso_type_code': '15001'},
'system': SYSTEM_CONSTANTS['kremc']}, kremc),
# Sun-WMS 6호기
({'id': 'sunwms-06', 'name': '6호기', 'type': 'sun_wms',
'auth': {'payload_id': 'kc0fXUW0LUm2wZa+2NQI0Q==', 'payload_pw': 'PGXjU6ib2mKYwtrh2i3fIQ=='},
'options': {},
'system': SYSTEM_CONSTANTS['sun_wms']}, sun_wms),
# Hyundai 8호기
({'id': 'hyundai-08', 'name': '8호기', 'type': 'hyundai',
'auth': {'user_id': 'epecoop', 'password': 'sunhope0419', 'site_id': 'M0494'},
'options': {},
'system': SYSTEM_CONSTANTS['hyundai']}, hyundai),
# CMSolar 10호기
({'id': 'cmsolar-10', 'name': '10호기', 'type': 'cmsolar',
'auth': {'login_id': 'smart3131', 'password': 'ehdrb!123', 'site_no': '834'},
'options': {},
'system': SYSTEM_CONSTANTS['cmsolar']}, cmsolar),
]
# 각 발전소 검증
for plant_config, crawler_module in test_plants:
try:
verify_plant(plant_config, crawler_module)
except KeyboardInterrupt:
print("\n\n⚠️ 사용자 중단")
break
except Exception as e:
print(f"\n{plant_config['name']} 검증 실패: {str(e)}")
import traceback
traceback.print_exc()
continue
print("\n" + "="*80)
print(">>> 데이터 검증 완료 <<<")
print("="*80)
if __name__ == '__main__':
main()