solorpower_crawler/crawlers/kremc.py

560 lines
20 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ==========================================
# crawlers/kremc.py - KREMC 크롤러 (5호기)
# ==========================================
import requests
import urllib.parse
from .base import safe_float, create_session
def fetch_data(plant_info):
"""
KREMC 발전소 데이터 수집
"""
# 설정 추출
plant_id = plant_info.get('id', 'kremc-05')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
company_name = plant_info.get('company_name', '태양과바람')
plant_name = plant_info.get('name', '5호기')
user_id = auth.get('user_id', '')
password = auth.get('password', '')
login_url = system.get('login_url', '')
api_base = system.get('api_base', '')
enso_type = system.get('enso_type', '15001')
try:
session = create_session()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Content-Type': 'application/json',
'Accept': 'application/json, text/plain, */*',
'Origin': 'https://kremc.kr',
'Referer': 'https://kremc.kr/login'
}
# 1. 로그인
login_data = {'userId': user_id, 'password': password}
login_res = session.post(login_url, json=login_data, headers=headers, timeout=10)
if login_res.status_code != 200:
print(f" ⚠️ KREMC 로그인 실패: {login_res.status_code}")
return []
try:
login_json = login_res.json()
if login_json.get('status') == 200 or login_json.get('code') == 'S001':
data = login_json.get('data')
if isinstance(data, str) and len(data) > 10:
token = data
elif isinstance(data, dict):
token = data.get('token') or data.get('accessToken') or data.get('jwt')
if not token:
return []
else:
return []
else:
print(f" ⚠️ KREMC 로그인 실패: {login_json.get('message', 'Unknown')}")
return []
except:
return []
print(f" [KREMC] 토큰 획득 성공")
# 2. API 헤더 설정
api_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'application/json',
'X-Auth-Token': token
}
installer_id_encoded = urllib.parse.quote(user_id)
# 3. 실시간 발전량 (kW)
latest_url = f"{api_base}/monitor/installer/gath/latest?installerId={installer_id_encoded}&ensoTypeCode={enso_type}"
latest_res = session.get(latest_url, headers=api_headers, timeout=10)
current_kw = 0.0
if latest_res.status_code == 200:
try:
latest_data = latest_res.json()
data = latest_data.get('data', {})
if isinstance(data, dict):
watts = safe_float(data.get('outpElcpFigr', 0))
current_kw = watts / 1000.0 if watts > 0 else 0.0
except:
pass
# 4. 일일 발전량 (kWh)
energy_url = f"{api_base}/monitor/installer/gath/energy?installerId={installer_id_encoded}&ensoTypeCode={enso_type}&cid="
energy_res = session.get(energy_url, headers=api_headers, timeout=10)
today_kwh = 0.0
if energy_res.status_code == 200:
try:
energy_data = energy_res.json()
data = energy_data.get('data', {})
if isinstance(data, dict):
today_kwh = safe_float(data.get('dayEnergy', 0))
except:
pass
print(f" [KREMC] {plant_name} 데이터: {current_kw} kW / {today_kwh} kWh")
return [{
'id': plant_id,
'name': f'{company_name} {plant_name}',
'kw': current_kw,
'today': today_kwh,
'status': '🟢 정상' if current_kw > 0 else '💤 대기'
}]
except Exception as e:
print(f" ❌ KREMC 오류: {e}")
return []
def fetch_history_hourly(plant_info, start_date, end_date):
"""
KREMC 발전소의 시간대별 과거 데이터 수집
Args:
plant_info: dict, 발전소 정보
start_date: str, 시작일 (YYYY-MM-DD)
end_date: str, 종료일 (YYYY-MM-DD)
Returns:
list: 시간대별 데이터 레코드
"""
from datetime import datetime, timedelta
import urllib.parse
results = []
# 설정 추출
plant_id = plant_info.get('id', 'kremc-05')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
options = plant_info.get('options', {})
plant_name = plant_info.get('name', '5호기')
user_id = auth.get('user_id', '')
password = auth.get('password', '')
login_url = system.get('login_url', '')
api_base = system.get('api_base', '')
enso_type = system.get('enso_type', '15001')
# KREMC 추가 파라미터
cid = options.get('cid', '10013000376')
city_prov_code = options.get('cityProvCode', '11')
rgn_code = options.get('rgnCode', '11410')
dong_code = options.get('dongCode', '1141011700')
session = create_session()
print(f"\n{'='*60}")
print(f"[KREMC History] {plant_name} ({start_date} ~ {end_date})")
print(f"{'='*60}")
# 로그인
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Content-Type': 'application/json',
'Accept': 'application/json, text/plain, */*',
'Origin': 'https://kremc.kr',
'Referer': 'https://kremc.kr/login'
}
try:
login_data = {'userId': user_id, 'password': password}
login_res = session.post(login_url, json=login_data, headers=headers, timeout=10)
if login_res.status_code != 200:
print(f" ✗ Login failed: {login_res.status_code}")
return results
login_json = login_res.json()
if login_json.get('status') == 200 or login_json.get('code') == 'S001':
data = login_json.get('data')
if isinstance(data, str) and len(data) > 10:
token = data
elif isinstance(data, dict):
token = data.get('token') or data.get('accessToken') or data.get('jwt')
if not token:
print(f" ✗ Token not found")
return results
else:
print(f" ✗ Invalid login data")
return results
else:
print(f" ✗ Login failed: {login_json.get('message', 'Unknown')}")
return results
print(f" ✓ Login successful")
# API 헤더 설정
api_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'application/json',
'X-Auth-Token': token
}
# 날짜 범위 반복
current_date = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
while current_date <= end_dt:
date_str = current_date.strftime('%Y-%m-%d')
print(f"\n[Processing Date] {date_str}")
# 실제 확인된 시간별 엔드포인트
hourly_url = f"{api_base}/stat/userbyuser/meainDataList"
params = {
'cid': cid,
'userId': user_id,
'cityProvCode': city_prov_code,
'rgnCode': rgn_code,
'dongCode': dong_code,
'dateType': 'HH',
'startGathDtm': date_str,
'endGathDtm': date_str,
'ensoTypeCode': enso_type
}
try:
res = session.get(hourly_url, params=params, headers=api_headers, timeout=10)
if res.status_code == 200:
data = res.json()
# KREMC 실제 응답 구조: data.userByTimeDataResultDtoList
hourly_list = data.get('data', {}).get('userByTimeDataResultDtoList', [])
if isinstance(hourly_list, list) and len(hourly_list) > 0:
print(f" ✓ Found {len(hourly_list)} hourly records")
for item in hourly_list:
# gathDtm: "00시", "01시", ..., "23시"
time_str = item.get('gathDtm', '')
hour = time_str.replace('', '').zfill(2)
generation_kwh = safe_float(item.get('dayEnergy', 0))
timestamp = f"{date_str} {hour}:00:00"
results.append({
'plant_id': plant_id,
'timestamp': timestamp,
'generation_kwh': generation_kwh,
'current_kw': 0
})
else:
print(f" ⚠ No hourly data for {date_str}")
else:
print(f" ✗ HTTP {res.status_code}")
except Exception as e:
print(f" ✗ Error: {e}")
# 다음 날짜로
current_date += timedelta(days=1)
except Exception as e:
print(f" ✗ Overall error: {e}")
print(f"\n{'='*60}")
print(f"[Total] Collected {len(results)} hourly records")
print(f"{'='*60}\n")
return results
def fetch_history_daily(plant_info, start_date, end_date):
"""
KREMC 발전소의 일별 과거 데이터 수집 (월 단위 분할)
Args:
plant_info: 발전소 정보
start_date: str, 시작일 (YYYY-MM-DD)
end_date: str, 종료일 (YYYY-MM-DD)
"""
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import calendar
import urllib.parse
results = []
plant_id = plant_info.get('id', 'kremc-05')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
options = plant_info.get('options', {})
plant_name = plant_info.get('name', '5호기')
user_id = auth.get('user_id', '')
password = auth.get('password', '')
login_url = system.get('login_url', '')
api_base = system.get('api_base', '')
enso_type = system.get('enso_type', '15001')
# KREMC 추가 파라미터
cid = options.get('cid', '10013000376')
city_prov_code = options.get('cityProvCode', '11')
rgn_code = options.get('rgnCode', '11410')
dong_code = options.get('dongCode', '1141011700')
session = create_session()
print(f"\n{'='*60}")
print(f"[KREMC Daily] {plant_name} ({start_date} ~ {end_date}) - Looping by Month")
print(f"{'='*60}")
# 로그인
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/json',
'Accept': 'application/json'
}
try:
login_data = {'userId': user_id, 'password': password}
login_res = session.post(login_url, json=login_data, headers=headers, timeout=10)
if login_res.status_code != 200:
print(" ✗ Login failed")
return results
login_json = login_res.json()
data = login_json.get('data')
token = data if isinstance(data, str) else data.get('token') if isinstance(data, dict) else None
if not token:
print(" ✗ Token not found")
return results
print(" ✓ Login successful")
api_headers = {
'User-Agent': 'Mozilla/5.0',
'Accept': 'application/json',
'X-Auth-Token': token
}
# 월 단위 루프 적용
current_date_dt = datetime.strptime(start_date, '%Y-%m-%d')
end_date_dt = datetime.strptime(end_date, '%Y-%m-%d')
# 시작하는 달의 첫날로 맞춤 (단, 실제 요청 시에는 start_date 고려)
# 하지만 그냥 편의상 start_date가 속한 달부터 end_date가 속한 달까지 루프 돌면서
# API 요청 범위를 정교하게 자르는 게 좋음.
# 루프용 변수: 현재 처리 중인 기간의 시작일
loop_start = current_date_dt
while loop_start <= end_date_dt:
# 현재 달의 마지막 날 계산
last_day_of_month = calendar.monthrange(loop_start.year, loop_start.month)[1]
loop_end = loop_start.replace(day=last_day_of_month)
# 종료일이 전체 종료일보다 뒤면 조정
if loop_end > end_date_dt:
loop_end = end_date_dt
s_str = loop_start.strftime('%Y-%m-%d')
e_str = loop_end.strftime('%Y-%m-%d')
print(f" [Fetching] {s_str} ~ {e_str} ...", end="", flush=True)
try:
daily_url = f"{api_base}/stat/userbyuser/meainDataList"
params = {
'cid': cid,
'userId': user_id,
'cityProvCode': city_prov_code,
'rgnCode': rgn_code,
'dongCode': dong_code,
'dateType': 'DD',
'startGathDtm': s_str,
'endGathDtm': e_str,
'ensoTypeCode': enso_type
}
res = session.get(daily_url, params=params, headers=api_headers, timeout=15)
if res.status_code == 200:
data = res.json()
daily_list = data.get('data', {}).get('userByTimeDataResultDtoList', [])
if daily_list:
count = 0
for item in daily_list:
# gathDtm: "2026-01-01" 형식
date_str = item.get('gathDtm', '')
generation_kwh = safe_float(item.get('dayEnergy', 0))
# 날짜 문자열 정리 (혹시 모를 공백 등 제거)
date_str = date_str.strip()
if len(date_str) > 10:
date_str = date_str[:10]
results.append({
'plant_id': plant_id,
'date': date_str,
'generation_kwh': generation_kwh,
'current_kw': 0
})
count += 1
print(f" OK ({count} days)")
else:
print(" No data")
else:
print(f" HTTP {res.status_code}")
except Exception as e:
print(f" Error: {e}")
# 다음 기간 설정 (현재 기간 끝 다음날)
loop_start = loop_end + timedelta(days=1)
except Exception as e:
print(f" ✗ Overall Error: {e}")
print(f"\n[Total] Collected {len(results)} daily records\n")
return results
def fetch_history_monthly(plant_info, start_month, end_month):
"""
KREMC 발전소의 월별 과거 데이터 수집
⚠️ KREMC는 dateType=MM을 지원하지 않음 (500 에러)
→ 일별 데이터(dateType=DD)를 월별로 집계
"""
from datetime import datetime
from dateutil.relativedelta import relativedelta
import urllib.parse
results = []
plant_id = plant_info.get('id', 'kremc-05')
auth = plant_info.get('auth', {})
system = plant_info.get('system', {})
options = plant_info.get('options', {})
plant_name = plant_info.get('name', '5호기')
# 시작일자 체크
plant_start_date = plant_info.get('start_date', '2018-06-28')
plant_start_month = plant_start_date[:7] # YYYY-MM
# 실제 시작 월은 발전소 가동일 이후로 제한
if start_month < plant_start_month:
actual_start = plant_start_month
print(f" 발전소 가동일({plant_start_date}) 이후부터 수집: {actual_start}")
else:
actual_start = start_month
user_id = auth.get('user_id', '')
password = auth.get('password', '')
login_url = system.get('login_url', '')
api_base = system.get('api_base', '')
enso_type = system.get('enso_type', '15001')
# KREMC 추가 파라미터
cid = options.get('cid', '10013000376')
city_prov_code = options.get('cityProvCode', '11')
rgn_code = options.get('rgnCode', '11410')
dong_code = options.get('dongCode', '1141011700')
session = create_session()
print(f"\n{'='*60}")
print(f"[KREMC Monthly] {plant_name} ({actual_start} ~ {end_month})")
print(f"{'='*60}")
# 로그인
headers = {
'User-Agent': 'Mozilla/5.0',
'Content-Type': 'application/json',
'Accept': 'application/json'
}
login_data = {'userId': user_id, 'password': password}
login_res = session.post(login_url, json=login_data, headers=headers, timeout=10)
if login_res.status_code != 200:
print(" ✗ Login failed")
return results
login_json = login_res.json()
data = login_json.get('data')
token = data if isinstance(data, str) else data.get('token') if isinstance(data, dict) else None
if not token:
print(" ✗ Token not found")
return results
print(" ✓ Login successful")
api_headers = {
'User-Agent': 'Mozilla/5.0',
'Accept': 'application/json',
'X-Auth-Token': token
}
current_month = datetime.strptime(actual_start, '%Y-%m')
end_month_dt = datetime.strptime(end_month, '%Y-%m')
while current_month <= end_month_dt:
month_str = current_month.strftime('%Y-%m')
# 해당 월의 시작일과 마지막일 계산
first_day = current_month.strftime('%Y-%m-01')
if current_month.month == 12:
last_day = current_month.replace(day=31).strftime('%Y-%m-%d')
else:
next_month = current_month + relativedelta(months=1)
last_day = (next_month - relativedelta(days=1)).strftime('%Y-%m-%d')
try:
# dateType=DD로 일별 데이터를 가져와서 합산
daily_url = f"{api_base}/stat/userbyuser/meainDataList"
params = {
'cid': cid,
'userId': user_id,
'cityProvCode': city_prov_code,
'rgnCode': rgn_code,
'dongCode': dong_code,
'dateType': 'DD',
'startGathDtm': first_day,
'endGathDtm': last_day,
'ensoTypeCode': enso_type
}
res = session.get(daily_url, params=params, headers=api_headers, timeout=10)
if res.status_code == 200:
data = res.json()
# KREMC 실제 응답 구조: data.userByTimeDataResultDtoList
daily_list = data.get('data', {}).get('userByTimeDataResultDtoList', [])
if isinstance(daily_list, list) and len(daily_list) > 0:
# 일별 데이터를 합산하여 월별 데이터 생성
monthly_total = sum([safe_float(item.get('dayEnergy', 0)) for item in daily_list])
results.append({
'plant_id': plant_id,
'month': month_str,
'generation_kwh': monthly_total
})
print(f"{month_str}: {monthly_total:.1f}kWh (from {len(daily_list)} days)")
except Exception as e:
print(f" ✗ Error for {month_str}: {e}")
# 다음 달로
current_month += relativedelta(months=1)
print(f"[Total] Collected {len(results)} monthly records\n")
return results