solorpower_crawler/tests/fill_all_today.py
2026-03-30 13:01:18 +09:00

119 lines
4.5 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import sys
import os
import importlib
from datetime import datetime, timezone, timedelta
# Add parent directory to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from dotenv import load_dotenv
load_dotenv(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), '.env'))
from database import get_supabase_client, save_history
from config import get_all_plants
def cleanup_history_today(plant_id, today_str):
"""
Cleans up 'History' status records for the target date to avoid duplicates.
"""
client = get_supabase_client()
if not client:
return
# Delete records with status='History' created within the target date range
# Since created_at is timestampz, we need to be careful.
# But usually save_history sets created_at to the actual data timestamp for hourly history.
# Or does it?
# In 'save_history' (database.py): records.append({ ..., 'created_at': final_created_at, ... })
# where final_created_at comes from the data timestamp.
# So we should delete range [today 00:00:00, today 23:59:59]
start_ts = f"{today_str}T00:00:00"
end_ts = f"{today_str}T23:59:59"
try:
# We also filter by status='History' to avoid deleting real-time crawled logs (if any exist)
# Real-time logs usually have status='Normal' or 'Abnormal' or empty.
# History fetch sets status='History'.
res = client.table('solar_logs').delete() \
.eq('plant_id', plant_id) \
.eq('status', 'History') \
.gte('created_at', start_ts) \
.lte('created_at', end_ts) \
.execute()
if res.data:
print(f" 🧹 Cleaned up {len(res.data)} old history records for {today_str}.")
except Exception as e:
print(f" ⚠️ Cleanup failed: {e}")
def fill_all_today():
plants = get_all_plants()
now_kst = datetime.now(timezone(timedelta(hours=9)))
today_str = now_kst.strftime("%Y-%m-%d")
print(f"🚀 Starting Manual Data Fetch for TODAY: {today_str}")
print("=" * 60)
for plant in plants:
plant_id = plant['id']
plant_name = plant['name']
plant_type = plant['type']
# Skip unknown or unsupported types
if plant_type == 'unknown':
continue
print(f"\nProcessing [{plant_type.upper()}] {plant_name} ({plant_id})...")
try:
# Dynamic import
module = importlib.import_module(f"crawlers.{plant_type}")
# 1. Hourly Data
if hasattr(module, 'fetch_history_hourly'):
print(" ⏳ Fetching Hourly Data...")
# Cleanup previous 'History' data for today to prevent dups
cleanup_history_today(plant_id, today_str)
try:
# fetch_history_hourly(config, start_date, end_date)
data = module.fetch_history_hourly(plant, today_str, today_str)
if data:
# save_history handles 'hourly' -> inserts into solar_logs
save_history(data, 'hourly')
else:
print(" ⚠️ No Hourly data found.")
except Exception as e:
print(f" ❌ Hourly Fetch Error: {e}")
else:
print(" No fetch_history_hourly method.")
# 2. Daily Data (Optional, as it might not be ready yet)
if hasattr(module, 'fetch_history_daily'):
print(" ⏳ Fetching Daily Data...")
try:
# fetch_history_daily(config, start_date, end_date)
data = module.fetch_history_daily(plant, today_str, today_str)
if data:
# save_history handles 'daily' -> upserts daily_stats & updates monthly
save_history(data, 'daily')
else:
print(" ⚠️ No Daily data found (Site might not list today yet).")
except Exception as e:
print(f" ❌ Daily Fetch Error: {e}")
else:
print(" No fetch_history_daily method.")
except ImportError:
print(f" ❌ Module 'crawlers.{plant_type}' not found.")
except Exception as e:
print(f" ❌ Error processing plant: {e}")
print("\n" + "=" * 60)
print("All tasks completed.")
if __name__ == "__main__":
fill_all_today()