513 lines
18 KiB
Python
513 lines
18 KiB
Python
# ==========================================
|
||
# crawlers/cmsolar.py - CMSolar 크롤러 (10호기)
|
||
# HTML 테이블 파싱 방식
|
||
# ==========================================
|
||
|
||
import requests
|
||
import re
|
||
from .base import create_session, safe_float
|
||
|
||
def fetch_data(plant_info):
|
||
"""
|
||
CMSolar 발전소 데이터 수집
|
||
"""
|
||
plant_id = plant_info.get('id', 'cmsolar-10')
|
||
auth = plant_info.get('auth', {})
|
||
system = plant_info.get('system', {})
|
||
company_name = plant_info.get('company_name', '함안햇빛발전소')
|
||
plant_name = plant_info.get('name', '10호기')
|
||
|
||
login_id = auth.get('login_id', '')
|
||
login_pw = auth.get('login_pw', '')
|
||
site_no = auth.get('site_no', '')
|
||
login_url = system.get('login_url', '')
|
||
data_url = system.get('data_url', '')
|
||
|
||
session = create_session()
|
||
|
||
headers = {
|
||
'User-Agent': 'Mozilla/5.0',
|
||
'Content-Type': 'application/x-www-form-urlencoded'
|
||
}
|
||
|
||
# 로그인
|
||
login_data = {
|
||
'login_id': login_id,
|
||
'login_pw': login_pw,
|
||
'site_no': site_no
|
||
}
|
||
|
||
try:
|
||
res = session.post(login_url, data=login_data, headers=headers)
|
||
if res.status_code != 200:
|
||
return []
|
||
|
||
# Site selection (Required for idx_ok.php)
|
||
base_url = system.get('base_url', 'http://www.cmsolar2.kr')
|
||
change_url = f"{base_url}/change.php?site={site_no}"
|
||
session.get(change_url, headers=headers)
|
||
|
||
except Exception as e:
|
||
print(f"❌ {plant_name} 접속 에러: {e}")
|
||
return []
|
||
|
||
# 데이터 요청 (JSON Endpoint)
|
||
target_url = f"{base_url}/plant/sub/idx_ok.php?mode=getPlant"
|
||
|
||
try:
|
||
res = session.get(target_url, headers=headers)
|
||
|
||
if res.status_code == 200:
|
||
# Handle potential encoding issues if needed, though requests usually guesses well
|
||
if res.encoding is None:
|
||
res.encoding = 'utf-8'
|
||
|
||
data = res.json()
|
||
|
||
# Parsing logic for [{"plant": {...}}] structure
|
||
if isinstance(data, list) and len(data) > 0:
|
||
plant_data = data[0].get('plant', {})
|
||
|
||
# Unit Conversion: W -> kW
|
||
curr_kw = safe_float(plant_data.get('now', 0)) / 1000.0
|
||
today_kwh = safe_float(plant_data.get('today', 0)) / 1000.0
|
||
|
||
# Status check
|
||
is_error = int(plant_data.get('inv_error', 0))
|
||
status = "🟢 정상" if is_error == 0 else "🔴 점검/고장"
|
||
|
||
# 0kW during day is suspicious but night is normal.
|
||
# If needed, override status based on time, but sticking to error flag is safer.
|
||
if curr_kw == 0 and status == "🟢 정상":
|
||
# Optional: Check if night time?
|
||
pass
|
||
|
||
return [{
|
||
'id': plant_id,
|
||
'name': f'{company_name} {plant_name}',
|
||
'kw': curr_kw,
|
||
'today': today_kwh,
|
||
'status': status
|
||
}]
|
||
else:
|
||
print(f"❌ {plant_name} 데이터 형식 오류: {data}")
|
||
return []
|
||
else:
|
||
return []
|
||
|
||
except Exception as e:
|
||
print(f"❌ {plant_name} 에러: {e}")
|
||
return []
|
||
|
||
|
||
def fetch_history_hourly(plant_info, start_date, end_date):
|
||
"""
|
||
CMSolar 발전소의 시간대별 과거 데이터 수집
|
||
|
||
실제 엔드포인트: /plant/sub/report_ok.php (HTML 테이블 응답)
|
||
파라미터: mode=getPowers&type=daily&device=total&start=YYYY-MM-DD&money=
|
||
"""
|
||
from datetime import datetime, timedelta
|
||
|
||
results = []
|
||
plant_id = plant_info.get('id', 'cmsolar-10')
|
||
auth = plant_info.get('auth', {})
|
||
system = plant_info.get('system', {})
|
||
plant_name = plant_info.get('name', '10호기')
|
||
|
||
login_id = auth.get('login_id', '')
|
||
login_pw = auth.get('login_pw', '')
|
||
site_no = auth.get('site_no', '')
|
||
login_url = system.get('login_url', '')
|
||
|
||
# 실제 데이터 엔드포인트
|
||
base_url = system.get('api_url', 'http://www.cmsolar2.kr')
|
||
data_url = f"{base_url}/plant/sub/report_ok.php"
|
||
|
||
session = create_session()
|
||
|
||
print(f"\n{'='*60}")
|
||
print(f"[CMSolar History] {plant_name} ({start_date} ~ {end_date})")
|
||
print(f"{'='*60}")
|
||
|
||
headers = {
|
||
'User-Agent': 'Mozilla/5.0',
|
||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
|
||
}
|
||
|
||
login_data = {
|
||
'login_id': login_id,
|
||
'login_pw': login_pw,
|
||
'site_no': site_no
|
||
}
|
||
|
||
try:
|
||
res = session.post(login_url, data=login_data, headers=headers)
|
||
if res.status_code == 200:
|
||
print(" ✓ Login successful")
|
||
else:
|
||
print(" ✗ Login failed")
|
||
return results
|
||
except Exception as e:
|
||
print(f" ✗ Login error: {e}")
|
||
return results
|
||
|
||
# 사이트 선택 (필수!)
|
||
try:
|
||
change_url = f"{base_url}/change.php?site={site_no}"
|
||
session.get(change_url, headers=headers)
|
||
print(" ✓ Site selected")
|
||
except Exception as e:
|
||
print(f" ✗ Site selection error: {e}")
|
||
return results
|
||
|
||
# 날짜 반복
|
||
current_date = datetime.strptime(start_date, '%Y-%m-%d')
|
||
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
|
||
|
||
while current_date <= end_dt:
|
||
date_str = current_date.strftime('%Y-%m-%d')
|
||
|
||
# 실제 확인된 시간별 엔드포인트 (type=daily는 하루 치 시간별 데이터 반환)
|
||
params = {
|
||
'mode': 'getPowers',
|
||
'type': 'daily',
|
||
'device': 'total',
|
||
'start': date_str,
|
||
'money': ''
|
||
}
|
||
|
||
try:
|
||
res = session.get(data_url, params=params, headers=headers, timeout=10)
|
||
res.encoding = 'utf-8'
|
||
|
||
if res.status_code == 200:
|
||
# HTML 테이블 파싱
|
||
html = res.text
|
||
|
||
# <tbody> 안의 <tr> 태그 찾기
|
||
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
|
||
if tbody_match:
|
||
tbody_content = tbody_match.group(1)
|
||
|
||
# 각 <tr> 파싱 (시간과 발전량)
|
||
# <tr class="odd"><td>9</td><td>3.0</td>...
|
||
tr_pattern = r'<tr[^>]*>\s*<td>(\d+)</td>\s*<td>([\d.]+)</td>'
|
||
matches = re.findall(tr_pattern, tbody_content)
|
||
|
||
if matches:
|
||
print(f" ✓ Found {len(matches)} hourly records for {date_str}")
|
||
|
||
for hour, kwh in matches:
|
||
generation_kwh = safe_float(kwh)
|
||
timestamp = f"{date_str} {hour.zfill(2)}:00:00"
|
||
|
||
results.append({
|
||
'plant_id': plant_id,
|
||
'timestamp': timestamp,
|
||
'generation_kwh': generation_kwh,
|
||
'current_kw': 0
|
||
})
|
||
else:
|
||
print(f" ⚠ No data for {date_str}")
|
||
else:
|
||
print(f" ⚠ No tbody found for {date_str}")
|
||
else:
|
||
print(f" ✗ HTTP {res.status_code}")
|
||
|
||
except Exception as e:
|
||
print(f" ✗ Error for {date_str}: {e}")
|
||
|
||
current_date += timedelta(days=1)
|
||
|
||
print(f"\n{'='*60}")
|
||
print(f"[Total] Collected {len(results)} hourly records")
|
||
print(f"{'='*60}\n")
|
||
|
||
return results
|
||
|
||
|
||
def fetch_history_daily(plant_info, start_date, end_date):
|
||
"""
|
||
CMSolar 발전소의 일별 과거 데이터 수집
|
||
|
||
실제 엔드포인트: /plant/sub/report_ok.php (HTML 테이블 응답)
|
||
파라미터: mode=getPowers&type=month&device=total&start=YYYY-MM-01&money=
|
||
"""
|
||
from datetime import datetime
|
||
from dateutil.relativedelta import relativedelta
|
||
|
||
results = []
|
||
plant_id = plant_info.get('id', 'cmsolar-10')
|
||
auth = plant_info.get('auth', {})
|
||
system = plant_info.get('system', {})
|
||
plant_name = plant_info.get('name', '10호기')
|
||
|
||
login_id = auth.get('login_id', '')
|
||
login_pw = auth.get('login_pw', '')
|
||
site_no = auth.get('site_no', '')
|
||
login_url = system.get('login_url', '')
|
||
|
||
# 실제 데이터 엔드포인트
|
||
base_url = system.get('api_url', 'http://www.cmsolar2.kr')
|
||
data_url = f"{base_url}/plant/sub/report_ok.php"
|
||
|
||
session = create_session()
|
||
|
||
print(f"\n{'='*60}")
|
||
print(f"[CMSolar Daily] {plant_name} ({start_date} ~ {end_date})")
|
||
print(f"{'='*60}")
|
||
|
||
headers = {
|
||
'User-Agent': 'Mozilla/5.0',
|
||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
|
||
}
|
||
|
||
login_data = {
|
||
'login_id': login_id,
|
||
'login_pw': login_pw,
|
||
'site_no': site_no
|
||
}
|
||
|
||
try:
|
||
res = session.post(login_url, data=login_data, headers=headers)
|
||
if res.status_code == 200:
|
||
print(" ✓ Login successful")
|
||
else:
|
||
print(" ✗ Login failed")
|
||
return results
|
||
except Exception as e:
|
||
print(f" ✗ Login error: {e}")
|
||
return results
|
||
|
||
# 사이트 선택 (필수!)
|
||
try:
|
||
change_url = f"{base_url}/change.php?site={site_no}"
|
||
session.get(change_url, headers=headers)
|
||
print(" ✓ Site selected")
|
||
except Exception as e:
|
||
print(f" ✗ Site selection error: {e}")
|
||
return results
|
||
|
||
# 월 단위로 반복 (type=month는 한 달 치 일별 데이터 반환)
|
||
current_date = datetime.strptime(start_date, '%Y-%m-%d')
|
||
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
|
||
|
||
while current_date <= end_dt:
|
||
month_start = current_date.strftime('%Y-%m-01')
|
||
year = current_date.year
|
||
month = current_date.month
|
||
|
||
# 실제 확인된 일별 엔드포인트 (type=month)
|
||
params = {
|
||
'mode': 'getPowers',
|
||
'type': 'month',
|
||
'device': 'total',
|
||
'start': month_start,
|
||
'money': ''
|
||
}
|
||
|
||
try:
|
||
res = session.get(data_url, params=params, headers=headers, timeout=10)
|
||
res.encoding = 'utf-8'
|
||
|
||
if res.status_code == 200:
|
||
# HTML 테이블 파싱
|
||
html = res.text
|
||
|
||
# <tbody> 안의 <tr> 태그 찾기
|
||
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
|
||
if tbody_match:
|
||
tbody_content = tbody_match.group(1)
|
||
|
||
# 각 <tr> 파싱 (날짜와 발전량)
|
||
# <tr class="odd"><td>1</td><td>136.00</td>...
|
||
tr_pattern = r'<tr[^>]*>\s*<td>(\d+)</td>\s*<td>([\d.,]+)</td>'
|
||
matches = re.findall(tr_pattern, tbody_content)
|
||
|
||
if matches:
|
||
print(f" ✓ Found {len(matches)} daily records for {month_start[:7]}")
|
||
|
||
for day, kwh in matches:
|
||
# 쉼표 제거
|
||
kwh_clean = kwh.replace(',', '')
|
||
generation_kwh = safe_float(kwh_clean)
|
||
|
||
date_str = f"{year:04d}-{month:02d}-{int(day):02d}"
|
||
|
||
# 날짜 범위 필터링
|
||
if date_str >= start_date and date_str <= end_date:
|
||
results.append({
|
||
'plant_id': plant_id,
|
||
'date': date_str,
|
||
'generation_kwh': generation_kwh,
|
||
'current_kw': 0
|
||
})
|
||
print(f" ✓ {date_str}: {generation_kwh:.2f}kWh")
|
||
else:
|
||
print(f" ⚠ No tbody found for {month_start[:7]}")
|
||
else:
|
||
print(f" ✗ HTTP {res.status_code} for {month_start[:7]}")
|
||
|
||
except Exception as e:
|
||
print(f" ✗ Error for {month_start[:7]}: {e}")
|
||
|
||
# 다음 달로 이동
|
||
current_date = (current_date.replace(day=1) + relativedelta(months=1))
|
||
|
||
print(f"[Total] Collected {len(results)} daily records\n")
|
||
return results
|
||
|
||
|
||
def fetch_history_monthly(plant_info, start_month, end_month):
|
||
"""
|
||
CMSolar 발전소의 월별 과거 데이터 수집
|
||
|
||
실제 엔드포인트: /plant/sub/report_ok.php (HTML 테이블 응답)
|
||
파라미터: mode=getPowers&type=year&device=total&start=YYYY-01-01&money=
|
||
"""
|
||
from datetime import datetime
|
||
from dateutil.relativedelta import relativedelta
|
||
|
||
results = []
|
||
plant_id = plant_info.get('id', 'cmsolar-10')
|
||
auth = plant_info.get('auth', {})
|
||
system = plant_info.get('system', {})
|
||
plant_name = plant_info.get('name', '10호기')
|
||
|
||
# 시작일자 체크
|
||
plant_start_date = plant_info.get('start_date', '2020-08-31')
|
||
plant_start_month = plant_start_date[:7] # YYYY-MM
|
||
|
||
# 실제 시작 월은 발전소 가동일 이후로 제한
|
||
if start_month < plant_start_month:
|
||
actual_start = plant_start_month
|
||
print(f" ℹ 발전소 가동일({plant_start_date}) 이후부터 수집: {actual_start}")
|
||
else:
|
||
actual_start = start_month
|
||
|
||
login_id = auth.get('login_id', '')
|
||
login_pw = auth.get('login_pw', '')
|
||
site_no = auth.get('site_no', '')
|
||
login_url = system.get('login_url', '')
|
||
|
||
# 실제 데이터 엔드포인트
|
||
base_url = system.get('api_url', 'http://www.cmsolar2.kr')
|
||
data_url = f"{base_url}/plant/sub/report_ok.php"
|
||
|
||
session = create_session()
|
||
|
||
print(f"\n{'='*60}")
|
||
print(f"[CMSolar Monthly] {plant_name} ({actual_start} ~ {end_month})")
|
||
print(f"{'='*60}")
|
||
|
||
headers = {
|
||
'User-Agent': 'Mozilla/5.0',
|
||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
|
||
}
|
||
|
||
login_data = {
|
||
'login_id': login_id,
|
||
'login_pw': login_pw,
|
||
'site_no': site_no
|
||
}
|
||
|
||
try:
|
||
res = session.post(login_url, data=login_data, headers=headers)
|
||
if res.status_code == 200:
|
||
print(" ✓ Login successful")
|
||
else:
|
||
print(" ✗ Login failed")
|
||
return results
|
||
except Exception as e:
|
||
print(f" ✗ Login error: {e}")
|
||
return results
|
||
|
||
# 사이트 선택 (필수!)
|
||
try:
|
||
change_url = f"{base_url}/change.php?site={site_no}"
|
||
session.get(change_url, headers=headers)
|
||
print(" ✓ Site selected")
|
||
except Exception as e:
|
||
print(f" ✗ Site selection error: {e}")
|
||
return results
|
||
|
||
# 연도별로 반복 (type=year는 한 해 치 월별 데이터 반환)
|
||
current_month = datetime.strptime(actual_start, '%Y-%m')
|
||
end_month_dt = datetime.strptime(end_month, '%Y-%m')
|
||
|
||
processed_years = set()
|
||
|
||
while current_month <= end_month_dt:
|
||
year = current_month.year
|
||
|
||
# 이미 처리한 연도는 스킵
|
||
if year in processed_years:
|
||
current_month += relativedelta(months=1)
|
||
continue
|
||
|
||
processed_years.add(year)
|
||
year_start = f"{year}-01-01"
|
||
|
||
# 실제 확인된 월별 엔드포인트 (type=year)
|
||
params = {
|
||
'mode': 'getPowers',
|
||
'type': 'year',
|
||
'device': 'total',
|
||
'start': year_start,
|
||
'money': ''
|
||
}
|
||
|
||
try:
|
||
res = session.get(data_url, params=params, headers=headers, timeout=10)
|
||
res.encoding = 'utf-8'
|
||
|
||
if res.status_code == 200:
|
||
# HTML 테이블 파싱
|
||
html = res.text
|
||
|
||
# <tbody> 안의 <tr> 태그 찾기
|
||
tbody_match = re.search(r'<tbody>(.*?)</tbody>', html, re.DOTALL)
|
||
if tbody_match:
|
||
tbody_content = tbody_match.group(1)
|
||
|
||
# 각 <tr> 파싱 (월과 발전량)
|
||
# <tr class="even"><td>1</td><td>2,836.00</td>...
|
||
tr_pattern = r'<tr[^>]*>\s*<td>(\d+)</td>\s*<td>([\d.,]+)</td>'
|
||
matches = re.findall(tr_pattern, tbody_content)
|
||
|
||
if matches:
|
||
year_count = 0
|
||
for month, kwh in matches:
|
||
# 쉼표 제거
|
||
kwh_clean = kwh.replace(',', '')
|
||
generation_kwh = safe_float(kwh_clean)
|
||
|
||
month_str = f"{year:04d}-{int(month):02d}"
|
||
|
||
# 월 범위 필터링
|
||
if month_str >= actual_start and month_str <= end_month:
|
||
results.append({
|
||
'plant_id': plant_id,
|
||
'month': month_str,
|
||
'generation_kwh': generation_kwh
|
||
})
|
||
print(f" ✓ {month_str}: {generation_kwh:.1f}kWh")
|
||
year_count += 1
|
||
|
||
if year_count > 0:
|
||
print(f" → Collected {year_count} months from {year}")
|
||
else:
|
||
print(f" ⚠ No tbody found for year {year}")
|
||
else:
|
||
print(f" ✗ HTTP {res.status_code} for year {year}")
|
||
|
||
except Exception as e:
|
||
print(f" ✗ Error for year {year}: {e}")
|
||
|
||
# 다음 연도로 이동
|
||
current_month = current_month.replace(year=year+1, month=1)
|
||
|
||
print(f"[Total] Collected {len(results)} monthly records\n")
|
||
return results
|