fix: Improve Excel parsing and error messages for monthly upload

2026-01-27 18:10:46 +09:00 · 2026-01-27 18:10:46 +09:00 · 4a5762b938
commit 4a5762b938
parent d56a8f7ae1
1 changed files with 56 additions and 24 deletions
--- a/app/routers/upload.py
+++ b/app/routers/upload.py
@ -337,27 +337,45 @@ async def upload_plant_monthly_data(
    # 3. 엑셀 파싱 및 전처리
    try:
        contents = await file.read()
-        df = pd.read_excel(io.BytesIO(contents))
+        # engine='openpyxl' 명시 (xlsx)
+        try:
+            df = pd.read_excel(io.BytesIO(contents), engine='openpyxl')
+        except:
+            # xls fallback
+            df = pd.read_excel(io.BytesIO(contents))
        
-        # 컬럼명 정규화 (소문자, 공백제거)
+        # 컬럼명 정규화 (모두 소문자, 앞뒤 공백 제거)
        df.columns = [str(col).lower().strip() for col in df.columns]
        
-        # 필수 컬럼 체크
-        # 사용자가 제공한 엑셀은 header가 없을 수도 있고, 1행이 header일 수도 있음.
-        # 일단 year, month, kwh가 포함되어 있다고 가정하거나, 첫 3열을 사용해야 할 수도 있음.
-        # 이미지에서는 header가 명확함 (year, month, kwh).
+        # 필수 컬럼 검사 (별칭 지원)
+        # year: year, 년도, 연도
+        # month: month, 월
+        # kwh: kwh, generation, 발전량, 발전량(kwh)
+        
+        col_map = {}
+        for col in df.columns:
+            if col in ['year', '년도', '연도']:
+                col_map['year'] = col
+            elif col in ['month', '월']:
+                col_map['month'] = col
+            elif col in ['kwh', 'generation', '발전량', '발전량(kwh)']:
+                col_map['kwh'] = col
+                
+        # 매핑된 컬럼으로 이름 변경
+        rename_dict = {}
+        if 'year' in col_map: rename_dict[col_map['year']] = 'year'
+        if 'month' in col_map: rename_dict[col_map['month']] = 'month'
+        if 'kwh' in col_map: rename_dict[col_map['kwh']] = 'kwh'
+        
+        df.rename(columns=rename_dict, inplace=True)
        
        required = {'year', 'month', 'kwh'}
        if not required.issubset(df.columns):
-            # 혹시 한글 헤더일 경우 매핑 시도
-            rename_map = {'년도': 'year', '월': 'month', '발전량': 'kwh', '발전량(kwh)': 'kwh'}
-            df.rename(columns=rename_map, inplace=True)
-            
-            if not required.issubset(df.columns):
-                raise HTTPException(
-                    status_code=400, 
-                    detail=f"필수 컬럼(year, month, kwh)이 누락되었습니다. 현재 컬럼: {list(df.columns)}"
-                )
+            found_cols = list(df.columns)
+            raise HTTPException(
+                status_code=400, 
+                detail=f"필수 컬럼이 누락되었습니다. (필요: year, month, kwh). 현재 인식된 컬럼: {found_cols}. 1행에 헤더가 있는지 확인해주세요."
+            )

        # A열(year) 병합된 셀 처리 (Forward Fill)
        df['year'] = df['year'].fillna(method='ffill')
@ -371,21 +389,28 @@ async def upload_plant_monthly_data(
            try:
                # 1. Year 파싱
                y_raw = str(row['year']).replace('년', '').strip()
+                # '2014.0' 같은 실수형 문자열 처리
                if not y_raw or y_raw.lower() == 'nan':
+                     continue
+                
+                try:
+                    year_val = int(float(y_raw))
+                except:
                    continue
-                year_val = int(float(y_raw)) # 2014.0 -> 2014
                
                # 2. Month 파싱
                m_raw = str(row['month']).strip()
-                if m_raw in ['합계', '평균', 'nan', 'None']:
+                if m_raw in ['합계', '평균', 'nan', 'None', '', 'nan']:
                    continue
                
-                # '1월' -> 1
+                # '1월' -> 1, '01' -> 1
                month_val_str = m_raw.replace('월', '').strip()
-                if not month_val_str.isdigit():
-                    continue # '합계' 등이 걸러지지 않은 경우 대비
                
-                month_val = int(month_val_str)
+                # 숫자가 아닌 경우(합계 등) skip
+                if not month_val_str.replace('.', '').isdigit():
+                    continue
+                    
+                month_val = int(float(month_val_str))
                if not (1 <= month_val <= 12):
                    continue
                
@ -394,7 +419,10 @@ async def upload_plant_monthly_data(
                if not k_raw or k_raw.lower() == 'nan':
                    kwh_val = 0.0
                else:
-                    kwh_val = float(k_raw)
+                    try:
+                        kwh_val = float(k_raw)
+                    except:
+                        kwh_val = 0.0
                
                # 포맷: YYYY-MM
                month_key = f"{year_val}-{month_val:02d}"
@ -407,10 +435,14 @@ async def upload_plant_monthly_data(
                })
                
            except Exception as e:
-                errors.append(f"Row {idx}: {e}")
+                errors.append(f"Row {idx+2}: {e}")
                
        if not records:
-             raise HTTPException(status_code=400, detail="유효한 데이터가 없습니다.")
+             raise HTTPException(
+                 status_code=400, 
+                 detail=f"유효한 데이터가 없습니다. 파싱 에러 예시: {errors[:3] if errors else '없음'}"
+             )
+

        # 4. DB Upsert
        # monthly_stats 테이블 생성 여부 확인이 필요하지만, 이미 되어있다고 가정