rapids/src/features/battery_metrics.py

60 lines
4.4 KiB
Python

import pandas as pd
import datetime
battery_data = pd.read_csv(snakemake.input[0])
if battery_data.empty:
battery_features = pd.DataFrame(columns=["battery_diff", "time_diff", "battery_decrease_times","battery_consumption_rate", "local_date"])
else:
for col in ["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"]:
battery_data[col] = pd.to_datetime(battery_data[col])
# split the row into 2 rows when local_start_date + 1 = local_end_date
battery_data_overnight = battery_data[battery_data["local_start_date"] + datetime.timedelta(days=1) == battery_data["local_end_date"]]
if not battery_data_overnight.empty:
battery_data_overnight_first, battery_data_overnight_second = pd.DataFrame(columns=battery_data.columns), pd.DataFrame(columns=battery_data.columns)
battery_data_overnight_first["total_battery_diff"], battery_data_overnight_second["total_battery_diff"] = battery_data_overnight["battery_diff"], battery_data_overnight["battery_diff"]
battery_data_overnight_first["total_time_diff"], battery_data_overnight_second["total_time_diff"] = battery_data_overnight["time_diff"], battery_data_overnight["time_diff"]
# let start = start OR end = end, then fill the left col with the start+23:59:59.
battery_data_overnight_first["local_start_date_time"] = battery_data_overnight["local_start_date_time"]
battery_data_overnight_first["local_end_date_time"] = battery_data_overnight["local_start_date"].apply(lambda x: datetime.datetime.combine(x, datetime.time(23,59,59)))
battery_data_overnight_first["local_start_date"] = battery_data_overnight["local_start_date"]
battery_data_overnight_second["local_end_date_time"] = battery_data_overnight["local_end_date_time"]
battery_data_overnight_second["local_start_date_time"] = battery_data_overnight["local_start_date"].apply(lambda x: datetime.datetime.combine(x, datetime.time(23,59,59)))
battery_data_overnight_second["local_start_date"] = battery_data_overnight["local_end_date"]
battery_data_overnight = pd.concat([battery_data_overnight_first, battery_data_overnight_second])
# calculate battery_diff and time_diff
battery_data_overnight["time_diff"] = (battery_data_overnight["local_end_date_time"]-battery_data_overnight["local_start_date_time"]).apply(lambda x: x.total_seconds()/3600)
battery_data_overnight["battery_diff"] = battery_data_overnight["total_battery_diff"]*(battery_data_overnight["time_diff"]/battery_data_overnight["total_time_diff"])
del battery_data_overnight["total_battery_diff"], battery_data_overnight["total_time_diff"]
# filter out the rows when local_start_date + 1 < local_end_date
battery_data = battery_data[battery_data["local_start_date"] == battery_data["local_end_date"]]
# combine
battery_data = pd.concat([battery_data, battery_data_overnight])
# split into decrease table and charge table
battery_data_decrease = battery_data[battery_data["battery_diff"] > 0]
battery_data_charge = battery_data[battery_data["battery_diff"] <= 0]
# for battery_data_decrease:
battery_decrease_count = battery_data_decrease.groupby(["local_start_date"])["local_start_date"].count()
battery_data_decrease = battery_data_decrease.groupby(["local_start_date"]).sum()
battery_data_decrease["battery_decrease_count"] = battery_decrease_count
battery_data_decrease["battery_decrease_duration"] = battery_data_decrease["time_diff"]
battery_data_decrease["battery_consumption_rate"] = battery_data_decrease["battery_diff"]/battery_data_decrease["time_diff"]
del battery_data_decrease["battery_diff"], battery_data_decrease["time_diff"]
# for battery_data_charge:
battery_charge_count = battery_data_charge.groupby(["local_start_date"])["local_start_date"].count()
battery_data_charge = battery_data_charge.groupby(["local_start_date"]).sum()
battery_data_charge["battery_charge_count"] = battery_charge_count
battery_data_charge["battery_charge_duration"] = battery_data_charge["time_diff"]
del battery_data_charge["battery_diff"], battery_data_charge["time_diff"]
# combine decrease features and charge features
battery_features = pd.concat([battery_data_decrease, battery_data_charge], axis=1, sort=True)
battery_features["local_date"] = battery_features.index
battery_features.reset_index(inplace=True, drop=True)
battery_features.to_csv(snakemake.output[0], index=False)