solorpower_crawler/crawlers/sun_wms.py

431 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ==========================================
# crawlers/sun_wms.py - Sun-WMS 크롤러 (6호기)
# HTML 테이블 파싱 방식
# ==========================================
import requests
import re
import time
from .base import create_session, safe_float
def fetch_data(plant_info):
"""
Sun-WMS 발전소 데이터 수집
"""
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
company_name = plant_info.get('company_name', '태양과바람')
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
data_url = system.get('data_url', '')
session = create_session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'http://tb6.sun-wms.com/public/main/login.php',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
# 1. 로그인
login_data = {
'act': 'loginChk',
'user_id': payload_id,
'user_pass': payload_pw
}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code != 200:
return []
except Exception as e:
print(f"{plant_name} 접속 에러: {e}")
return []
# 2. 데이터 요청
try:
timestamp = int(time.time() * 1000)
res = session.get(f"{data_url}?time={timestamp}", headers=headers)
res.encoding = 'euc-kr'
content = res.text
match_kw = re.search(r"id=['\"]cur_power['\"].*?value=['\"]([^'\"]+)['\"]", content)
curr_kw = float(match_kw.group(1)) if match_kw else 0.0
match_today = re.search(r"id=['\"]today_power['\"].*?value=['\"]([^'\"]+)['\"]", content)
today_kwh = float(match_today.group(1)) if match_today else 0.0
status = "🟢 정상" if curr_kw > 0 else "💤 대기"
return [{
'id': plant_id,
'name': f'{company_name} {plant_name}',
'kw': curr_kw,
'today': today_kwh,
'status': status
}]
except Exception as e:
print(f"{plant_name} 에러: {e}")
return []
def fetch_history_hourly(plant_info, start_date, end_date):
"""
Sun-WMS 발전소의 시간대별 과거 데이터 수집
실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답)
파라미터: tab01=0&tab02=1&tab03=2&tord=1&s_day=YYYY-MM-DD
"""
from datetime import datetime, timedelta
results = []
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
# base_url 추출
base_url = system.get('base_url', '')
statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php")
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS History] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 날짜 반복
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
# 실제 확인된 시간별 엔드포인트
params = {
'tab01': '0',
'tab02': '1',
'tab03': '2',
'tord': '1',
's_day': date_str
}
try:
res = session.get(statics_url, params=params, headers=headers, timeout=10)
res.encoding = 'euc-kr'
if res.status_code == 200:
# HTML 테이블 파싱
html = res.text
# <tbody> 안의 <tr> 태그 찾기
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
if tbody_match:
tbody_content = tbody_match.group(1)
# 각 <tr> 파싱
tr_pattern = r'<tr>\s*<td>(\d{2}):00</td>\s*<td>([\d.]+)</td>\s*</tr>'
matches = re.findall(tr_pattern, tbody_content)
if matches:
print(f" ✓ Found {len(matches)} hourly records")
for hour, kwh in matches:
generation_kwh = safe_float(kwh)
timestamp = f"{date_str} {hour}:00:00"
results.append({
'plant_id': plant_id,
'timestamp': timestamp,
'generation_kwh': generation_kwh,
'current_kw': 0
})
else:
print(f" ⚠ No data for {date_str}")
else:
print(f" ⚠ No tbody found for {date_str}")
else:
print(f" ✗ HTTP {res.status_code}")
except Exception as e:
print(f" ✗ Error: {e}")
current_date += timedelta(days=1)
print(f"\n{'='*60}")
print(f"[Total] Collected {len(results)} hourly records")
print(f"{'='*60}\n")
return results
def fetch_history_daily(plant_info, start_date, end_date):
"""
Sun-WMS 발전소의 일별 과거 데이터 수집 (월 단위 분할)
실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답)
파라미터: tab01=0&tab02=2&tab03=2&tord=2&s_day=YYYY-MM-DD&e_day=YYYY-MM-DD
"""
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import calendar
import re
from .base import safe_float, create_session
results = []
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
base_url = system.get('base_url', '')
statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php")
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS Daily] {plant_name} ({start_date} ~ {end_date}) - Looping by Month")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
# 로그인
try:
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 월 단위 루프 적용
start_dt = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
loop_start = start_dt
while loop_start <= end_dt:
# 현재 달의 마지막 날 계산
last_day_of_month = calendar.monthrange(loop_start.year, loop_start.month)[1]
loop_end = loop_start.replace(day=last_day_of_month)
# 종료일이 전체 종료일보다 뒤면 조정
if loop_end > end_dt:
loop_end = end_dt
s_str = loop_start.strftime('%Y-%m-%d')
e_str = loop_end.strftime('%Y-%m-%d')
print(f" [Fetching] {s_str} ~ {e_str} ...", end="", flush=True)
params = {
'tab01': '0',
'tab02': '2',
'tab03': '2',
'tord': '2',
's_day': s_str,
'e_day': e_str
}
try:
res = session.get(statics_url, params=params, headers=headers, timeout=15)
res.encoding = 'euc-kr'
if res.status_code == 200:
html = res.text
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
if tbody_match:
tbody_content = tbody_match.group(1)
tr_pattern = r'<tr>\s*<td>(\d{4}-\d{2}-\d{2})</td>\s*<td>([\d.]+)</td>'
matches = re.findall(tr_pattern, tbody_content)
if matches:
count = 0
for date_str, kwh in matches:
generation_kwh = safe_float(kwh)
results.append({
'plant_id': plant_id,
'date': date_str,
'generation_kwh': generation_kwh,
'current_kw': 0
})
count += 1
print(f" OK ({count} days)")
else:
print(" No data")
else:
print(" No tbody")
else:
print(f" HTTP {res.status_code}")
except Exception as e:
print(f" Error: {e}")
# 다음 기간 설정
loop_start = loop_end + timedelta(days=1)
print(f"\n[Total] Collected {len(results)} daily records\n")
return results
def fetch_history_monthly(plant_info, start_month, end_month):
"""
Sun-WMS 발전소의 월별 과거 데이터 수집
실제 엔드포인트: /public/statics/statics.php (HTML 테이블 응답)
⚠️ 월별 데이터는 일별 데이터를 월별로 집계
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
results = []
plant_id = plant_info.get('id', 'sunwms-06')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
plant_name = plant_info.get('name', '6호기')
# 시작일자 체크
plant_start_date = plant_info.get('start_date', '2019-12-30')
plant_start_month = plant_start_date[:7] # YYYY-MM
# 실제 시작 월은 발전소 가동일 이후로 제한
if start_month < plant_start_month:
actual_start = plant_start_month
print(f" 발전소 가동일({plant_start_date}) 이후부터 수집: {actual_start}")
else:
actual_start = start_month
payload_id = auth.get('payload_id', '')
payload_pw = auth.get('payload_pw', '')
login_url = system.get('login_url', '')
# base_url 추출
base_url = system.get('base_url', '')
statics_url = system.get('statics_url', f"{base_url}/public/statics/statics.php")
session = create_session()
print(f"\n{'='*60}")
print(f"[Sun-WMS Monthly] {plant_name} ({actual_start} ~ {end_month})")
print(f"{'='*60}")
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
login_data = {'act': 'loginChk', 'user_id': payload_id, 'user_pass': payload_pw}
try:
res = session.post(login_url, data=login_data, headers=headers)
if res.status_code == 200:
print(" ✓ Login successful")
else:
print(" ✗ Login failed")
return results
except Exception as e:
print(f" ✗ Login error: {e}")
return results
# 월 단위로 반복
current_month = datetime.strptime(actual_start, '%Y-%m')
end_month_dt = datetime.strptime(end_month, '%Y-%m')
while current_month <= end_month_dt:
month_str = current_month.strftime('%Y-%m')
# 해당 월의 시작일과 마지막일
first_day = current_month.strftime('%Y-%m-01')
if current_month.month == 12:
last_day = current_month.replace(day=31).strftime('%Y-%m-%d')
else:
next_month = current_month + relativedelta(months=1)
last_day = (next_month - relativedelta(days=1)).strftime('%Y-%m-%d')
# 일별 엔드포인트로 한 달치 데이터 수집해서 합산
params = {
'tab01': '0',
'tab02': '2',
'tab03': '2',
'tord': '2',
's_day': first_day,
'e_day': last_day
}
try:
res = session.get(statics_url, params=params, headers=headers, timeout=10)
res.encoding = 'euc-kr'
if res.status_code == 200:
# HTML 테이블 파싱
html = res.text
# <tbody> 안의 <tr> 태그 찾기
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
if tbody_match:
tbody_content = tbody_match.group(1)
# 각 <tr> 파싱 (날짜와 발전량)
tr_pattern = r'<tr>\s*<td>(\d{4}-\d{2}-\d{2})</td>\s*<td>([\d.]+)</td>'
matches = re.findall(tr_pattern, tbody_content)
if matches:
# 일별 데이터를 합산
monthly_total = sum([safe_float(kwh) for _, kwh in matches])
results.append({
'plant_id': plant_id,
'month': month_str,
'generation_kwh': monthly_total
})
print(f"{month_str}: {monthly_total:.1f}kWh (from {len(matches)} days)")
else:
print(f" ⚠ No data for {month_str}")
except Exception as e:
print(f" ✗ Error for {month_str}: {e}")
# 다음 달로
current_month += relativedelta(months=1)
print(f"[Total] Collected {len(results)} monthly records\n")
return results