431 lines
15 KiB
Python
431 lines
15 KiB
Python
# ==========================================
|
||
# crawlers/sun_wms.py - Sun-WMS 크롤러 (6호기)
|
||
# HTML 테이블 파싱 방식
|
||
# ==========================================
|
||
|
||
import requests
|
||
import re
|
||
import time
|
||
from .base import create_session, safe_float
|
||
|
||
def fetch_data(plant_info):
|
||
"""
|
||
Sun-WMS 발전소 데이터 수집
|
||
"""
|
||
plant_id = plant_info.get('id', 'sunwms-06')
|
||
auth = plant_info.get('auth', {})
|
||
system = plant_info.get('system', {})
|
||
company_name = plant_info.get('company_name', '태양과바람')
|
||
plant_name = plant_info.get('name', '6호기')
|
||
|
||
payload_id = auth.get('payload_id', '')
|
||
payload_pw = auth.get('payload_pw', '')
|
||
login_url = system.get('login_url', '')
|
||
data_url = system.get('data_url', '')
|
||
|
||
session = create_session()
|
||
|
||
headers = {
|
||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
|
||
'Referer': 'http://tb6.sun-wms.com/public/main/login.php',
|
||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
|
||
}
|
||
|
||
# 1. 로그인
|
||
login_data = {
|
||
'act': 'loginChk',
|
||
'user_id': payload_id,
|
||
'user_pass': payload_pw
|
||
}
|
||
|
||
try:
|
||
res = session.post(login_url, data=login_data, headers=headers)
|
||
if res.status_code != 200:
|
||
return []
|
||
except Exception as e:
|
||
print(f"❌ {plant_name} 접속 에러: {e}")
|
||
return []
|
||
|
||
# 2. 데이터 요청
|
||
try:
|
||
timestamp = int(time.time() * 1000)
|
||
res = session.get(f"{data_url}?time={timestamp}", headers=headers)
|
||
res.encoding = 'euc-kr'
|
||
|
||
content = res.text
|
||
|
||
match_kw = re.search(r"id=['\"]cur_power['\"].*?value=['\"]([^'\"]+)['\"]", content)
|
||
curr_kw = float(match_kw.group(1)) if match_kw else 0.0
|
||
|
||
match_today = re.search(r"id=['\"]today_power['\"].*?value=['\"]([^'\"]+)['\"]", content)
|
||
today_kwh = float(match_today.group(1)) if match_today else 0.0
|
||
|
||
status = "🟢 정상" if curr_kw > 0 else "💤 대기"
|
||
|
||
return [{
|
||
'id': plant_id,
|
||
'name': f'{company_name} {plant_name}',
|
||
'kw': curr_kw,
|
||
'today': today_kwh,
|
||
'status': status
|
||
}]
|
||
|
||
except Exception as e:
|
||
print(f"❌ {plant_name} 에러: {e}")
|
||
return []
|
||
|
||
|
||
def fetch_history_hourly(plant_info, start_date, end_date):
|
||
"""
|
||
Sun-WMS 발전소의 시간대별 과거 데이터 수집
|
||
|
||
실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답)
|
||
파라미터: tab01=0&tab02=1&tab03=2&tord=1&s_day=YYYY-MM-DD
|
||
"""
|
||
from datetime import datetime, timedelta
|
||
|
||
results = []
|
||
plant_id = plant_info.get('id', 'sunwms-06')
|
||
auth = plant_info.get('auth', {})
|
||
system = plant_info.get('system', {})
|
||
plant_name = plant_info.get('name', '6호기')
|
||
|
||
payload_id = auth.get('payload_id', '')
|
||
payload_pw = auth.get('payload_pw', '')
|
||
login_url = system.get('login_url', '')
|
||
|
||
# base_url 추출
|
||
base_url = system.get('base_url', '')
|
||
statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php")
|
||
|
||
session = create_session()
|
||
|
||
print(f"\n{'='*60}")
|
||
print(f"[Sun-WMS History] {plant_name} ({start_date} ~ {end_date})")
|
||
print(f"{'='*60}")
|
||
|
||
headers = {
|
||
'User-Agent': 'Mozilla/5.0',
|
||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
|
||
}
|
||
|
||
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
|
||
|
||
try:
|
||
res = session.post(login_url, data=login_data, headers=headers)
|
||
if res.status_code == 200:
|
||
print(" ✓ Login successful")
|
||
else:
|
||
print(" ✗ Login failed")
|
||
return results
|
||
except Exception as e:
|
||
print(f" ✗ Login error: {e}")
|
||
return results
|
||
|
||
# 날짜 반복
|
||
current_date = datetime.strptime(start_date, '%Y-%m-%d')
|
||
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
|
||
|
||
while current_date <= end_dt:
|
||
date_str = current_date.strftime('%Y-%m-%d')
|
||
|
||
# 실제 확인된 시간별 엔드포인트
|
||
params = {
|
||
'tab01': '0',
|
||
'tab02': '1',
|
||
'tab03': '2',
|
||
'tord': '1',
|
||
's_day': date_str
|
||
}
|
||
|
||
try:
|
||
res = session.get(statics_url, params=params, headers=headers, timeout=10)
|
||
res.encoding = 'euc-kr'
|
||
|
||
if res.status_code == 200:
|
||
# HTML 테이블 파싱
|
||
html = res.text
|
||
|
||
# <tbody> 안의 <tr> 태그 찾기
|
||
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
|
||
if tbody_match:
|
||
tbody_content = tbody_match.group(1)
|
||
|
||
# 각 <tr> 파싱
|
||
tr_pattern = r'<tr>\s*<td>(\d{2}):00</td>\s*<td>([\d.]+)</td>\s*</tr>'
|
||
matches = re.findall(tr_pattern, tbody_content)
|
||
|
||
if matches:
|
||
print(f" ✓ Found {len(matches)} hourly records")
|
||
|
||
for hour, kwh in matches:
|
||
generation_kwh = safe_float(kwh)
|
||
timestamp = f"{date_str} {hour}:00:00"
|
||
|
||
results.append({
|
||
'plant_id': plant_id,
|
||
'timestamp': timestamp,
|
||
'generation_kwh': generation_kwh,
|
||
'current_kw': 0
|
||
})
|
||
else:
|
||
print(f" ⚠ No data for {date_str}")
|
||
else:
|
||
print(f" ⚠ No tbody found for {date_str}")
|
||
else:
|
||
print(f" ✗ HTTP {res.status_code}")
|
||
|
||
except Exception as e:
|
||
print(f" ✗ Error: {e}")
|
||
|
||
current_date += timedelta(days=1)
|
||
|
||
print(f"\n{'='*60}")
|
||
print(f"[Total] Collected {len(results)} hourly records")
|
||
print(f"{'='*60}\n")
|
||
|
||
return results
|
||
|
||
|
||
def fetch_history_daily(plant_info, start_date, end_date):
|
||
"""
|
||
Sun-WMS 발전소의 일별 과거 데이터 수집 (월 단위 분할)
|
||
|
||
실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답)
|
||
파라미터: tab01=0&tab02=2&tab03=2&tord=2&s_day=YYYY-MM-DD&e_day=YYYY-MM-DD
|
||
"""
|
||
from datetime import datetime, timedelta
|
||
from dateutil.relativedelta import relativedelta
|
||
import calendar
|
||
import re
|
||
from .base import safe_float, create_session
|
||
|
||
results = []
|
||
plant_id = plant_info.get('id', 'sunwms-06')
|
||
auth = plant_info.get('auth', {})
|
||
system = plant_info.get('system', {})
|
||
plant_name = plant_info.get('name', '6호기')
|
||
|
||
payload_id = auth.get('payload_id', '')
|
||
payload_pw = auth.get('payload_pw', '')
|
||
login_url = system.get('login_url', '')
|
||
base_url = system.get('base_url', '')
|
||
statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php")
|
||
|
||
session = create_session()
|
||
|
||
print(f"\n{'='*60}")
|
||
print(f"[Sun-WMS Daily] {plant_name} ({start_date} ~ {end_date}) - Looping by Month")
|
||
print(f"{'='*60}")
|
||
|
||
headers = {
|
||
'User-Agent': 'Mozilla/5.0',
|
||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
|
||
}
|
||
|
||
# 로그인
|
||
try:
|
||
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
|
||
res = session.post(login_url, data=login_data, headers=headers)
|
||
if res.status_code == 200:
|
||
print(" ✓ Login successful")
|
||
else:
|
||
print(" ✗ Login failed")
|
||
return results
|
||
except Exception as e:
|
||
print(f" ✗ Login error: {e}")
|
||
return results
|
||
|
||
# 월 단위 루프 적용
|
||
start_dt = datetime.strptime(start_date, '%Y-%m-%d')
|
||
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
|
||
|
||
loop_start = start_dt
|
||
|
||
while loop_start <= end_dt:
|
||
# 현재 달의 마지막 날 계산
|
||
last_day_of_month = calendar.monthrange(loop_start.year, loop_start.month)[1]
|
||
loop_end = loop_start.replace(day=last_day_of_month)
|
||
|
||
# 종료일이 전체 종료일보다 뒤면 조정
|
||
if loop_end > end_dt:
|
||
loop_end = end_dt
|
||
|
||
s_str = loop_start.strftime('%Y-%m-%d')
|
||
e_str = loop_end.strftime('%Y-%m-%d')
|
||
|
||
print(f" [Fetching] {s_str} ~ {e_str} ...", end="", flush=True)
|
||
|
||
params = {
|
||
'tab01': '0',
|
||
'tab02': '2',
|
||
'tab03': '2',
|
||
'tord': '2',
|
||
's_day': s_str,
|
||
'e_day': e_str
|
||
}
|
||
|
||
try:
|
||
res = session.get(statics_url, params=params, headers=headers, timeout=15)
|
||
res.encoding = 'euc-kr'
|
||
|
||
if res.status_code == 200:
|
||
html = res.text
|
||
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
|
||
|
||
if tbody_match:
|
||
tbody_content = tbody_match.group(1)
|
||
tr_pattern = r'<tr>\s*<td>(\d{4}-\d{2}-\d{2})</td>\s*<td>([\d.]+)</td>'
|
||
matches = re.findall(tr_pattern, tbody_content)
|
||
|
||
if matches:
|
||
count = 0
|
||
for date_str, kwh in matches:
|
||
generation_kwh = safe_float(kwh)
|
||
results.append({
|
||
'plant_id': plant_id,
|
||
'date': date_str,
|
||
'generation_kwh': generation_kwh,
|
||
'current_kw': 0
|
||
})
|
||
count += 1
|
||
print(f" OK ({count} days)")
|
||
else:
|
||
print(" No data")
|
||
else:
|
||
print(" No tbody")
|
||
else:
|
||
print(f" HTTP {res.status_code}")
|
||
|
||
except Exception as e:
|
||
print(f" Error: {e}")
|
||
|
||
# 다음 기간 설정
|
||
loop_start = loop_end + timedelta(days=1)
|
||
|
||
print(f"\n[Total] Collected {len(results)} daily records\n")
|
||
return results
|
||
|
||
|
||
def fetch_history_monthly(plant_info, start_month, end_month):
|
||
"""
|
||
Sun-WMS 발전소의 월별 과거 데이터 수집
|
||
|
||
실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답)
|
||
⚠️ 월별 데이터는 일별 데이터를 월별로 집계
|
||
"""
|
||
from datetime import datetime
|
||
from dateutil.relativedelta import relativedelta
|
||
|
||
results = []
|
||
plant_id = plant_info.get('id', 'sunwms-06')
|
||
auth = plant_info.get('auth', {})
|
||
system = plant_info.get('system', {})
|
||
plant_name = plant_info.get('name', '6호기')
|
||
|
||
# 시작일자 체크
|
||
plant_start_date = plant_info.get('start_date', '2019-12-30')
|
||
plant_start_month = plant_start_date[:7] # YYYY-MM
|
||
|
||
# 실제 시작 월은 발전소 가동일 이후로 제한
|
||
if start_month < plant_start_month:
|
||
actual_start = plant_start_month
|
||
print(f" ℹ 발전소 가동일({plant_start_date}) 이후부터 수집: {actual_start}")
|
||
else:
|
||
actual_start = start_month
|
||
|
||
payload_id = auth.get('payload_id', '')
|
||
payload_pw = auth.get('payload_pw', '')
|
||
login_url = system.get('login_url', '')
|
||
|
||
# base_url 추출
|
||
base_url = system.get('base_url', '')
|
||
statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php")
|
||
|
||
session = create_session()
|
||
|
||
print(f"\n{'='*60}")
|
||
print(f"[Sun-WMS Monthly] {plant_name} ({actual_start} ~ {end_month})")
|
||
print(f"{'='*60}")
|
||
|
||
headers = {
|
||
'User-Agent': 'Mozilla/5.0',
|
||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
|
||
}
|
||
|
||
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
|
||
|
||
try:
|
||
res = session.post(login_url, data=login_data, headers=headers)
|
||
if res.status_code == 200:
|
||
print(" ✓ Login successful")
|
||
else:
|
||
print(" ✗ Login failed")
|
||
return results
|
||
except Exception as e:
|
||
print(f" ✗ Login error: {e}")
|
||
return results
|
||
|
||
# 월 단위로 반복
|
||
current_month = datetime.strptime(actual_start, '%Y-%m')
|
||
end_month_dt = datetime.strptime(end_month, '%Y-%m')
|
||
|
||
while current_month <= end_month_dt:
|
||
month_str = current_month.strftime('%Y-%m')
|
||
|
||
# 해당 월의 시작일과 마지막일
|
||
first_day = current_month.strftime('%Y-%m-01')
|
||
if current_month.month == 12:
|
||
last_day = current_month.replace(day=31).strftime('%Y-%m-%d')
|
||
else:
|
||
next_month = current_month + relativedelta(months=1)
|
||
last_day = (next_month - relativedelta(days=1)).strftime('%Y-%m-%d')
|
||
|
||
# 일별 엔드포인트로 한 달치 데이터 수집해서 합산
|
||
params = {
|
||
'tab01': '0',
|
||
'tab02': '2',
|
||
'tab03': '2',
|
||
'tord': '2',
|
||
's_day': first_day,
|
||
'e_day': last_day
|
||
}
|
||
|
||
try:
|
||
res = session.get(statics_url, params=params, headers=headers, timeout=10)
|
||
res.encoding = 'euc-kr'
|
||
|
||
if res.status_code == 200:
|
||
# HTML 테이블 파싱
|
||
html = res.text
|
||
|
||
# <tbody> 안의 <tr> 태그 찾기
|
||
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
|
||
if tbody_match:
|
||
tbody_content = tbody_match.group(1)
|
||
|
||
# 각 <tr> 파싱 (날짜와 발전량)
|
||
tr_pattern = r'<tr>\s*<td>(\d{4}-\d{2}-\d{2})</td>\s*<td>([\d.]+)</td>'
|
||
matches = re.findall(tr_pattern, tbody_content)
|
||
|
||
if matches:
|
||
# 일별 데이터를 합산
|
||
monthly_total = sum([safe_float(kwh) for _, kwh in matches])
|
||
|
||
results.append({
|
||
'plant_id': plant_id,
|
||
'month': month_str,
|
||
'generation_kwh': monthly_total
|
||
})
|
||
print(f" ✓ {month_str}: {monthly_total:.1f}kWh (from {len(matches)} days)")
|
||
else:
|
||
print(f" ⚠ No data for {month_str}")
|
||
except Exception as e:
|
||
print(f" ✗ Error for {month_str}: {e}")
|
||
|
||
# 다음 달로
|
||
current_month += relativedelta(months=1)
|
||
|
||
print(f"[Total] Collected {len(results)} monthly records\n")
|
||
return results
|