rapids/src/features/phone_battery/rapids/main.py

54 lines
3.5 KiB
Python

import pandas as pd
from datetime import datetime, timedelta, time
def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
battery_data = pd.read_csv(sensor_data_files["sensor_episodes"])
# name of the features this function can compute
base_features_names = ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
# the subset of requested features this function can compute
requested_features = provider["FEATURES"]
features_to_compute = list(set(requested_features) & set(base_features_names))
battery_features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
if not battery_data.empty:
battery_data = filter_data_by_segment(battery_data, time_segment)
if not battery_data.empty:
battery_data["episode_id"] = ((battery_data.battery_status != battery_data.battery_status.shift()) | (battery_data.start_timestamp - battery_data.end_timestamp.shift() > 1)).cumsum()
grouped = battery_data.groupby(by=["local_segment", "episode_id", "battery_status"])
battery_episodes= grouped[["duration"]].sum()
battery_episodes["battery_diff"] = grouped["battery_level"].first() - grouped["battery_level"].last()
battery_episodes["battery_consumption_rate"] = battery_episodes["battery_diff"] / battery_episodes["duration"]
battery_episodes.reset_index(inplace=True)
# for discharge episodes
battery_discharge_episodes = battery_episodes[(battery_episodes["battery_status"] == 3) | (battery_episodes["battery_status"] == 4)]
battery_discharge_features = pd.DataFrame()
if "countdischarge" in features_to_compute:
battery_discharge_features["countdischarge"] = battery_discharge_episodes.groupby(["local_segment"])["episode_id"].count()
if "sumdurationdischarge" in features_to_compute:
battery_discharge_features["sumdurationdischarge"] = battery_discharge_episodes.groupby(["local_segment"])["duration"].sum()
if "avgconsumptionrate" in features_to_compute:
battery_discharge_features["avgconsumptionrate"] = battery_discharge_episodes.groupby(["local_segment"])["battery_consumption_rate"].mean()
if "maxconsumptionrate" in features_to_compute:
battery_discharge_features["maxconsumptionrate"] = battery_discharge_episodes.groupby(["local_segment"])["battery_consumption_rate"].max()
# for charge episodes
battery_charge_episodes = battery_episodes[(battery_episodes["battery_status"] == 2) | (battery_episodes["battery_status"] == 5)]
battery_charge_features = pd.DataFrame()
if "countcharge" in features_to_compute:
battery_charge_features["countcharge"] = battery_charge_episodes.groupby(["local_segment"])["episode_id"].count()
if "sumdurationcharge" in features_to_compute:
battery_charge_features["sumdurationcharge"] = battery_charge_episodes.groupby(["local_segment"])["duration"].sum()
# combine discharge features and charge features; fill the missing values with ZERO
battery_features = pd.concat([battery_discharge_features, battery_charge_features], axis=1, sort=True).fillna(0)
battery_features.index.rename("local_segment", inplace=True)
battery_features = battery_features.reset_index()
return battery_features