# ========================================== # crawlers/sun_wms.py - Sun-WMS 크롤러 (6호기) # HTML 테이블 파싱 방식 # ========================================== import requests import re import time from .base import create_session, safe_float def fetch_data(plant_info): """ Sun-WMS 발전소 데이터 수집 """ plant_id = plant_info.get('id', 'sunwms-06') auth = plant_info.get('auth', {}) system = plant_info.get('system', {}) company_name = plant_info.get('company_name', '태양과바람') plant_name = plant_info.get('name', '6호기') payload_id = auth.get('payload_id', '') payload_pw = auth.get('payload_pw', '') login_url = system.get('login_url', '') data_url = system.get('data_url', '') session = create_session() headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', 'Referer': 'http://tb6.sun-wms.com/public/main/login.php', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' } # 1. 로그인 login_data = { 'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw } try: res = session.post(login_url, data=login_data, headers=headers) if res.status_code != 200: return [] except Exception as e: print(f"❌ {plant_name} 접속 에러: {e}") return [] # 2. 데이터 요청 try: timestamp = int(time.time() * 1000) res = session.get(f"{data_url}?time={timestamp}", headers=headers) res.encoding = 'euc-kr' content = res.text match_kw = re.search(r"id=['\"]cur_power['\"].*?value=['\"]([^'\"]+)['\"]", content) curr_kw = float(match_kw.group(1)) if match_kw else 0.0 match_today = re.search(r"id=['\"]today_power['\"].*?value=['\"]([^'\"]+)['\"]", content) today_kwh = float(match_today.group(1)) if match_today else 0.0 status = "🟢 정상" if curr_kw > 0 else "💤 대기" return [{ 'id': plant_id, 'name': f'{company_name} {plant_name}', 'kw': curr_kw, 'today': today_kwh, 'status': status }] except Exception as e: print(f"❌ {plant_name} 에러: {e}") return [] def fetch_history_hourly(plant_info, start_date, end_date): """ Sun-WMS 발전소의 시간대별 과거 데이터 수집 실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답) 파라미터: tab01=0&tab02=1&tab03=2&tord=1&s_day=YYYY-MM-DD """ from datetime import datetime, timedelta results = [] plant_id = plant_info.get('id', 'sunwms-06') auth = plant_info.get('auth', {}) system = plant_info.get('system', {}) plant_name = plant_info.get('name', '6호기') payload_id = auth.get('payload_id', '') payload_pw = auth.get('payload_pw', '') login_url = system.get('login_url', '') # base_url 추출 base_url = system.get('base_url', '') statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php") session = create_session() print(f"\n{'='*60}") print(f"[Sun-WMS History] {plant_name} ({start_date} ~ {end_date})") print(f"{'='*60}") headers = { 'User-Agent': 'Mozilla/5.0', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' } login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw} try: res = session.post(login_url, data=login_data, headers=headers) if res.status_code == 200: print(" ✓ Login successful") else: print(" ✗ Login failed") return results except Exception as e: print(f" ✗ Login error: {e}") return results # 날짜 반복 current_date = datetime.strptime(start_date, '%Y-%m-%d') end_dt = datetime.strptime(end_date, '%Y-%m-%d') while current_date <= end_dt: date_str = current_date.strftime('%Y-%m-%d') # 실제 확인된 시간별 엔드포인트 params = { 'tab01': '0', 'tab02': '1', 'tab03': '2', 'tord': '1', 's_day': date_str } try: res = session.get(statics_url, params=params, headers=headers, timeout=10) res.encoding = 'euc-kr' if res.status_code == 200: # HTML 테이블 파싱 html = res.text # 안의 태그 찾기 tbody_match = re.search(r'(.*?)', html, re.DOTALL) if tbody_match: tbody_content = tbody_match.group(1) # 각 파싱 tr_pattern = r'\s*(\d{2}):00\s*([\d.]+)\s*' matches = re.findall(tr_pattern, tbody_content) if matches: print(f" ✓ Found {len(matches)} hourly records") for hour, kwh in matches: generation_kwh = safe_float(kwh) timestamp = f"{date_str} {hour}:00:00" results.append({ 'plant_id': plant_id, 'timestamp': timestamp, 'generation_kwh': generation_kwh, 'current_kw': 0 }) else: print(f" ⚠ No data for {date_str}") else: print(f" ⚠ No tbody found for {date_str}") else: print(f" ✗ HTTP {res.status_code}") except Exception as e: print(f" ✗ Error: {e}") current_date += timedelta(days=1) print(f"\n{'='*60}") print(f"[Total] Collected {len(results)} hourly records") print(f"{'='*60}\n") return results def fetch_history_daily(plant_info, start_date, end_date): """ Sun-WMS 발전소의 일별 과거 데이터 수집 (월 단위 분할) 실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답) 파라미터: tab01=0&tab02=2&tab03=2&tord=2&s_day=YYYY-MM-DD&e_day=YYYY-MM-DD """ from datetime import datetime, timedelta from dateutil.relativedelta import relativedelta import calendar import re from .base import safe_float, create_session results = [] plant_id = plant_info.get('id', 'sunwms-06') auth = plant_info.get('auth', {}) system = plant_info.get('system', {}) plant_name = plant_info.get('name', '6호기') payload_id = auth.get('payload_id', '') payload_pw = auth.get('payload_pw', '') login_url = system.get('login_url', '') base_url = system.get('base_url', '') statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php") session = create_session() print(f"\n{'='*60}") print(f"[Sun-WMS Daily] {plant_name} ({start_date} ~ {end_date}) - Looping by Month") print(f"{'='*60}") headers = { 'User-Agent': 'Mozilla/5.0', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' } # 로그인 try: login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw} res = session.post(login_url, data=login_data, headers=headers) if res.status_code == 200: print(" ✓ Login successful") else: print(" ✗ Login failed") return results except Exception as e: print(f" ✗ Login error: {e}") return results # 월 단위 루프 적용 start_dt = datetime.strptime(start_date, '%Y-%m-%d') end_dt = datetime.strptime(end_date, '%Y-%m-%d') loop_start = start_dt while loop_start <= end_dt: # 현재 달의 마지막 날 계산 last_day_of_month = calendar.monthrange(loop_start.year, loop_start.month)[1] loop_end = loop_start.replace(day=last_day_of_month) # 종료일이 전체 종료일보다 뒤면 조정 if loop_end > end_dt: loop_end = end_dt s_str = loop_start.strftime('%Y-%m-%d') e_str = loop_end.strftime('%Y-%m-%d') print(f" [Fetching] {s_str} ~ {e_str} ...", end="", flush=True) params = { 'tab01': '0', 'tab02': '2', 'tab03': '2', 'tord': '2', 's_day': s_str, 'e_day': e_str } try: res = session.get(statics_url, params=params, headers=headers, timeout=15) res.encoding = 'euc-kr' if res.status_code == 200: html = res.text tbody_match = re.search(r'(.*?)', html, re.DOTALL) if tbody_match: tbody_content = tbody_match.group(1) tr_pattern = r'\s*(\d{4}-\d{2}-\d{2})\s*([\d.]+)' matches = re.findall(tr_pattern, tbody_content) if matches: count = 0 for date_str, kwh in matches: generation_kwh = safe_float(kwh) results.append({ 'plant_id': plant_id, 'date': date_str, 'generation_kwh': generation_kwh, 'current_kw': 0 }) count += 1 print(f" OK ({count} days)") else: print(" No data") else: print(" No tbody") else: print(f" HTTP {res.status_code}") except Exception as e: print(f" Error: {e}") # 다음 기간 설정 loop_start = loop_end + timedelta(days=1) print(f"\n[Total] Collected {len(results)} daily records\n") return results def fetch_history_monthly(plant_info, start_month, end_month): """ Sun-WMS 발전소의 월별 과거 데이터 수집 실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답) ⚠️ 월별 데이터는 일별 데이터를 월별로 집계 """ from datetime import datetime from dateutil.relativedelta import relativedelta results = [] plant_id = plant_info.get('id', 'sunwms-06') auth = plant_info.get('auth', {}) system = plant_info.get('system', {}) plant_name = plant_info.get('name', '6호기') # 시작일자 체크 plant_start_date = plant_info.get('start_date', '2019-12-30') plant_start_month = plant_start_date[:7] # YYYY-MM # 실제 시작 월은 발전소 가동일 이후로 제한 if start_month < plant_start_month: actual_start = plant_start_month print(f" ℹ 발전소 가동일({plant_start_date}) 이후부터 수집: {actual_start}") else: actual_start = start_month payload_id = auth.get('payload_id', '') payload_pw = auth.get('payload_pw', '') login_url = system.get('login_url', '') # base_url 추출 base_url = system.get('base_url', '') statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php") session = create_session() print(f"\n{'='*60}") print(f"[Sun-WMS Monthly] {plant_name} ({actual_start} ~ {end_month})") print(f"{'='*60}") headers = { 'User-Agent': 'Mozilla/5.0', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' } login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw} try: res = session.post(login_url, data=login_data, headers=headers) if res.status_code == 200: print(" ✓ Login successful") else: print(" ✗ Login failed") return results except Exception as e: print(f" ✗ Login error: {e}") return results # 월 단위로 반복 current_month = datetime.strptime(actual_start, '%Y-%m') end_month_dt = datetime.strptime(end_month, '%Y-%m') while current_month <= end_month_dt: month_str = current_month.strftime('%Y-%m') # 해당 월의 시작일과 마지막일 first_day = current_month.strftime('%Y-%m-01') if current_month.month == 12: last_day = current_month.replace(day=31).strftime('%Y-%m-%d') else: next_month = current_month + relativedelta(months=1) last_day = (next_month - relativedelta(days=1)).strftime('%Y-%m-%d') # 일별 엔드포인트로 한 달치 데이터 수집해서 합산 params = { 'tab01': '0', 'tab02': '2', 'tab03': '2', 'tord': '2', 's_day': first_day, 'e_day': last_day } try: res = session.get(statics_url, params=params, headers=headers, timeout=10) res.encoding = 'euc-kr' if res.status_code == 200: # HTML 테이블 파싱 html = res.text # 안의 태그 찾기 tbody_match = re.search(r'(.*?)', html, re.DOTALL) if tbody_match: tbody_content = tbody_match.group(1) # 각 파싱 (날짜와 발전량) tr_pattern = r'\s*(\d{4}-\d{2}-\d{2})\s*([\d.]+)' matches = re.findall(tr_pattern, tbody_content) if matches: # 일별 데이터를 합산 monthly_total = sum([safe_float(kwh) for _, kwh in matches]) results.append({ 'plant_id': plant_id, 'month': month_str, 'generation_kwh': monthly_total }) print(f" ✓ {month_str}: {monthly_total:.1f}kWh (from {len(matches)} days)") else: print(f" ⚠ No data for {month_str}") except Exception as e: print(f" ✗ Error for {month_str}: {e}") # 다음 달로 current_month += relativedelta(months=1) print(f"[Total] Collected {len(results)} monthly records\n") return results