# ========================================== # crawlers/cmsolar.py - CMSolar 크롤러 (10호기) # HTML 테이블 파싱 방식 # ========================================== import requests import re from .base import create_session, safe_float def fetch_data(plant_info): """ CMSolar 발전소 데이터 수집 """ plant_id = plant_info.get('id', 'cmsolar-10') auth = plant_info.get('auth', {}) system = plant_info.get('system', {}) company_name = plant_info.get('company_name', '함안햇빛발전소') plant_name = plant_info.get('name', '10호기') login_id = auth.get('login_id', '') login_pw = auth.get('login_pw', '') site_no = auth.get('site_no', '') login_url = system.get('login_url', '') data_url = system.get('data_url', '') session = create_session() headers = { 'User-Agent': 'Mozilla/5.0', 'Content-Type': 'application/x-www-form-urlencoded' } # 로그인 login_data = { 'login_id': login_id, 'login_pw': login_pw, 'site_no': site_no } try: res = session.post(login_url, data=login_data, headers=headers) if res.status_code != 200: return [] # Site selection (Required for idx_ok.php) base_url = system.get('base_url', 'http://www.cmsolar2.kr') change_url = f"{base_url}/change.php?site={site_no}" session.get(change_url, headers=headers) except Exception as e: print(f"❌ {plant_name} 접속 에러: {e}") return [] # 데이터 요청 (JSON Endpoint) target_url = f"{base_url}/plant/sub/idx_ok.php?mode=getPlant" try: res = session.get(target_url, headers=headers) if res.status_code == 200: # Handle potential encoding issues if needed, though requests usually guesses well if res.encoding is None: res.encoding = 'utf-8' data = res.json() # Parsing logic for [{"plant": {...}}] structure if isinstance(data, list) and len(data) > 0: plant_data = data[0].get('plant', {}) # Unit Conversion: W -> kW curr_kw = safe_float(plant_data.get('now', 0)) / 1000.0 today_kwh = safe_float(plant_data.get('today', 0)) / 1000.0 # Status check is_error = int(plant_data.get('inv_error', 0)) status = "🟢 정상" if is_error == 0 else "🔴 점검/고장" # 0kW during day is suspicious but night is normal. # If needed, override status based on time, but sticking to error flag is safer. if curr_kw == 0 and status == "🟢 정상": # Optional: Check if night time? pass return [{ 'id': plant_id, 'name': f'{company_name} {plant_name}', 'kw': curr_kw, 'today': today_kwh, 'status': status }] else: print(f"❌ {plant_name} 데이터 형식 오류: {data}") return [] else: return [] except Exception as e: print(f"❌ {plant_name} 에러: {e}") return [] def fetch_history_hourly(plant_info, start_date, end_date): """ CMSolar 발전소의 시간대별 과거 데이터 수집 실제 엔드포인트: /plant/sub/report_ok.php (HTML 테이블 응답) 파라미터: mode=getPowers&type=daily&device=total&start=YYYY-MM-DD&money= """ from datetime import datetime, timedelta results = [] plant_id = plant_info.get('id', 'cmsolar-10') auth = plant_info.get('auth', {}) system = plant_info.get('system', {}) plant_name = plant_info.get('name', '10호기') login_id = auth.get('login_id', '') login_pw = auth.get('login_pw', '') site_no = auth.get('site_no', '') login_url = system.get('login_url', '') # 실제 데이터 엔드포인트 base_url = system.get('api_url', 'http://www.cmsolar2.kr') data_url = f"{base_url}/plant/sub/report_ok.php" session = create_session() print(f"\n{'='*60}") print(f"[CMSolar History] {plant_name} ({start_date} ~ {end_date})") print(f"{'='*60}") headers = { 'User-Agent': 'Mozilla/5.0', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' } login_data = { 'login_id': login_id, 'login_pw': login_pw, 'site_no': site_no } try: res = session.post(login_url, data=login_data, headers=headers) if res.status_code == 200: print(" ✓ Login successful") else: print(" ✗ Login failed") return results except Exception as e: print(f" ✗ Login error: {e}") return results # 사이트 선택 (필수!) try: change_url = f"{base_url}/change.php?site={site_no}" session.get(change_url, headers=headers) print(" ✓ Site selected") except Exception as e: print(f" ✗ Site selection error: {e}") return results # 날짜 반복 current_date = datetime.strptime(start_date, '%Y-%m-%d') end_dt = datetime.strptime(end_date, '%Y-%m-%d') while current_date <= end_dt: date_str = current_date.strftime('%Y-%m-%d') # 실제 확인된 시간별 엔드포인트 (type=daily는 하루 치 시간별 데이터 반환) params = { 'mode': 'getPowers', 'type': 'daily', 'device': 'total', 'start': date_str, 'money': '' } try: res = session.get(data_url, params=params, headers=headers, timeout=10) res.encoding = 'utf-8' if res.status_code == 200: # HTML 테이블 파싱 html = res.text # 안의 태그 찾기 tbody_match = re.search(r'(.*?)', html, re.DOTALL) if tbody_match: tbody_content = tbody_match.group(1) # 각 파싱 (시간과 발전량) # 93.0... tr_pattern = r']*>\s*(\d+)\s*([\d.]+)' matches = re.findall(tr_pattern, tbody_content) if matches: print(f" ✓ Found {len(matches)} hourly records for {date_str}") for hour, kwh in matches: generation_kwh = safe_float(kwh) timestamp = f"{date_str} {hour.zfill(2)}:00:00" results.append({ 'plant_id': plant_id, 'timestamp': timestamp, 'generation_kwh': generation_kwh, 'current_kw': 0 }) else: print(f" ⚠ No data for {date_str}") else: print(f" ⚠ No tbody found for {date_str}") else: print(f" ✗ HTTP {res.status_code}") except Exception as e: print(f" ✗ Error for {date_str}: {e}") current_date += timedelta(days=1) print(f"\n{'='*60}") print(f"[Total] Collected {len(results)} hourly records") print(f"{'='*60}\n") return results def fetch_history_daily(plant_info, start_date, end_date): """ CMSolar 발전소의 일별 과거 데이터 수집 실제 엔드포인트: /plant/sub/report_ok.php (HTML 테이블 응답) 파라미터: mode=getPowers&type=month&device=total&start=YYYY-MM-01&money= """ from datetime import datetime from dateutil.relativedelta import relativedelta results = [] plant_id = plant_info.get('id', 'cmsolar-10') auth = plant_info.get('auth', {}) system = plant_info.get('system', {}) plant_name = plant_info.get('name', '10호기') login_id = auth.get('login_id', '') login_pw = auth.get('login_pw', '') site_no = auth.get('site_no', '') login_url = system.get('login_url', '') # 실제 데이터 엔드포인트 base_url = system.get('api_url', 'http://www.cmsolar2.kr') data_url = f"{base_url}/plant/sub/report_ok.php" session = create_session() print(f"\n{'='*60}") print(f"[CMSolar Daily] {plant_name} ({start_date} ~ {end_date})") print(f"{'='*60}") headers = { 'User-Agent': 'Mozilla/5.0', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' } login_data = { 'login_id': login_id, 'login_pw': login_pw, 'site_no': site_no } try: res = session.post(login_url, data=login_data, headers=headers) if res.status_code == 200: print(" ✓ Login successful") else: print(" ✗ Login failed") return results except Exception as e: print(f" ✗ Login error: {e}") return results # 사이트 선택 (필수!) try: change_url = f"{base_url}/change.php?site={site_no}" session.get(change_url, headers=headers) print(" ✓ Site selected") except Exception as e: print(f" ✗ Site selection error: {e}") return results # 월 단위로 반복 (type=month는 한 달 치 일별 데이터 반환) current_date = datetime.strptime(start_date, '%Y-%m-%d') end_dt = datetime.strptime(end_date, '%Y-%m-%d') while current_date <= end_dt: month_start = current_date.strftime('%Y-%m-01') year = current_date.year month = current_date.month # 실제 확인된 일별 엔드포인트 (type=month) params = { 'mode': 'getPowers', 'type': 'month', 'device': 'total', 'start': month_start, 'money': '' } try: res = session.get(data_url, params=params, headers=headers, timeout=10) res.encoding = 'utf-8' if res.status_code == 200: # HTML 테이블 파싱 html = res.text # 안의 태그 찾기 tbody_match = re.search(r'(.*?)', html, re.DOTALL) if tbody_match: tbody_content = tbody_match.group(1) # 각 파싱 (날짜와 발전량) # 1136.00... tr_pattern = r']*>\s*(\d+)\s*([\d.,]+)' matches = re.findall(tr_pattern, tbody_content) if matches: print(f" ✓ Found {len(matches)} daily records for {month_start[:7]}") for day, kwh in matches: # 쉼표 제거 kwh_clean = kwh.replace(',', '') generation_kwh = safe_float(kwh_clean) date_str = f"{year:04d}-{month:02d}-{int(day):02d}" # 날짜 범위 필터링 if date_str >= start_date and date_str <= end_date: results.append({ 'plant_id': plant_id, 'date': date_str, 'generation_kwh': generation_kwh, 'current_kw': 0 }) print(f" ✓ {date_str}: {generation_kwh:.2f}kWh") else: print(f" ⚠ No tbody found for {month_start[:7]}") else: print(f" ✗ HTTP {res.status_code} for {month_start[:7]}") except Exception as e: print(f" ✗ Error for {month_start[:7]}: {e}") # 다음 달로 이동 current_date = (current_date.replace(day=1) + relativedelta(months=1)) print(f"[Total] Collected {len(results)} daily records\n") return results def fetch_history_monthly(plant_info, start_month, end_month): """ CMSolar 발전소의 월별 과거 데이터 수집 실제 엔드포인트: /plant/sub/report_ok.php (HTML 테이블 응답) 파라미터: mode=getPowers&type=year&device=total&start=YYYY-01-01&money= """ from datetime import datetime from dateutil.relativedelta import relativedelta results = [] plant_id = plant_info.get('id', 'cmsolar-10') auth = plant_info.get('auth', {}) system = plant_info.get('system', {}) plant_name = plant_info.get('name', '10호기') # 시작일자 체크 plant_start_date = plant_info.get('start_date', '2020-08-31') plant_start_month = plant_start_date[:7] # YYYY-MM # 실제 시작 월은 발전소 가동일 이후로 제한 if start_month < plant_start_month: actual_start = plant_start_month print(f" ℹ 발전소 가동일({plant_start_date}) 이후부터 수집: {actual_start}") else: actual_start = start_month login_id = auth.get('login_id', '') login_pw = auth.get('login_pw', '') site_no = auth.get('site_no', '') login_url = system.get('login_url', '') # 실제 데이터 엔드포인트 base_url = system.get('api_url', 'http://www.cmsolar2.kr') data_url = f"{base_url}/plant/sub/report_ok.php" session = create_session() print(f"\n{'='*60}") print(f"[CMSolar Monthly] {plant_name} ({actual_start} ~ {end_month})") print(f"{'='*60}") headers = { 'User-Agent': 'Mozilla/5.0', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' } login_data = { 'login_id': login_id, 'login_pw': login_pw, 'site_no': site_no } try: res = session.post(login_url, data=login_data, headers=headers) if res.status_code == 200: print(" ✓ Login successful") else: print(" ✗ Login failed") return results except Exception as e: print(f" ✗ Login error: {e}") return results # 사이트 선택 (필수!) try: change_url = f"{base_url}/change.php?site={site_no}" session.get(change_url, headers=headers) print(" ✓ Site selected") except Exception as e: print(f" ✗ Site selection error: {e}") return results # 연도별로 반복 (type=year는 한 해 치 월별 데이터 반환) current_month = datetime.strptime(actual_start, '%Y-%m') end_month_dt = datetime.strptime(end_month, '%Y-%m') processed_years = set() while current_month <= end_month_dt: year = current_month.year # 이미 처리한 연도는 스킵 if year in processed_years: current_month += relativedelta(months=1) continue processed_years.add(year) year_start = f"{year}-01-01" # 실제 확인된 월별 엔드포인트 (type=year) params = { 'mode': 'getPowers', 'type': 'year', 'device': 'total', 'start': year_start, 'money': '' } try: res = session.get(data_url, params=params, headers=headers, timeout=10) res.encoding = 'utf-8' if res.status_code == 200: # HTML 테이블 파싱 html = res.text # 안의 태그 찾기 tbody_match = re.search(r'(.*?)', html, re.DOTALL) if tbody_match: tbody_content = tbody_match.group(1) # 각 파싱 (월과 발전량) # 12,836.00... tr_pattern = r']*>\s*(\d+)\s*([\d.,]+)' matches = re.findall(tr_pattern, tbody_content) if matches: year_count = 0 for month, kwh in matches: # 쉼표 제거 kwh_clean = kwh.replace(',', '') generation_kwh = safe_float(kwh_clean) month_str = f"{year:04d}-{int(month):02d}" # 월 범위 필터링 if month_str >= actual_start and month_str <= end_month: results.append({ 'plant_id': plant_id, 'month': month_str, 'generation_kwh': generation_kwh }) print(f" ✓ {month_str}: {generation_kwh:.1f}kWh") year_count += 1 if year_count > 0: print(f" → Collected {year_count} months from {year}") else: print(f" ⚠ No tbody found for year {year}") else: print(f" ✗ HTTP {res.status_code} for year {year}") except Exception as e: print(f" ✗ Error for year {year}: {e}") # 다음 연도로 이동 current_month = current_month.replace(year=year+1, month=1) print(f"[Total] Collected {len(results)} monthly records\n") return results