2020-10-08 00:11:06 +02:00
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
|
|
|
|
|
|
|
ar_episodes = pd.read_csv(sensor_data_files["sensor_episodes"])
|
|
|
|
activity_classes = provider["ACTIVITY_CLASSES"]
|
|
|
|
|
|
|
|
# name of the features this function can compute
|
|
|
|
base_features_names = ["count","mostcommonactivity","countuniqueactivities","durationstationary","durationmobile","durationvehicle"]
|
|
|
|
# the subset of requested features this function can compute
|
|
|
|
requested_features = provider["FEATURES"]
|
|
|
|
features_to_compute = list(set(requested_features) & set(base_features_names))
|
|
|
|
|
|
|
|
ar_features = pd.DataFrame(columns=["local_segment"] + ["ar_rapids_" + x for x in features_to_compute])
|
|
|
|
if not ar_episodes.empty:
|
|
|
|
ar_episodes = filter_data_by_segment(ar_episodes, day_segment)
|
|
|
|
|
|
|
|
if not ar_episodes.empty:
|
|
|
|
ar_features = pd.DataFrame()
|
|
|
|
|
|
|
|
if "count" in features_to_compute:
|
|
|
|
ar_features["ar_rapids_count"] = ar_episodes.groupby(["local_segment"]).count()["episode_id"]
|
|
|
|
if "mostcommonactivity" in features_to_compute:
|
|
|
|
ar_features["ar_rapids_mostcommonactivity"] = ar_episodes.groupby(["local_segment"])["activity_type"].agg(lambda x: pd.Series.mode(x)[0])
|
|
|
|
if "countuniqueactivities" in features_to_compute:
|
|
|
|
ar_features["ar_rapids_countuniqueactivities"] = ar_episodes.groupby(["local_segment"])["activity_type"].nunique()
|
|
|
|
|
|
|
|
# duration features
|
|
|
|
for column, activity_labels in activity_classes.items():
|
|
|
|
if "duration" + column.lower() in features_to_compute:
|
|
|
|
filtered_data = ar_episodes[ar_episodes["activity_name"].isin(pd.Series(activity_labels))]
|
|
|
|
if not filtered_data.empty:
|
2020-10-26 20:44:21 +01:00
|
|
|
ar_features["ar_rapids_duration" + column.lower()] = ar_episodes[ar_episodes["activity_name"].isin(pd.Series(activity_labels))].groupby(["local_segment"])["duration"].sum().fillna(0)
|
2020-10-08 00:11:06 +02:00
|
|
|
else:
|
2020-10-26 20:44:21 +01:00
|
|
|
ar_features["ar_rapids_duration" + column.lower()] = 0
|
2020-10-08 00:11:06 +02:00
|
|
|
|
|
|
|
ar_features.index.names = ["local_segment"]
|
|
|
|
ar_features = ar_features.reset_index()
|
|
|
|
|
|
|
|
return ar_features
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
if not ar_data.empty:
|
|
|
|
ar_data = filter_data_by_segment(ar_data, day_segment)
|
|
|
|
|
|
|
|
if not ar_data.empty:
|
|
|
|
# chunk_episodes
|
|
|
|
ar_data = chunk_episodes(ar_data)
|
|
|
|
|
|
|
|
if not ar_data.empty:
|
|
|
|
|
|
|
|
ar_data["episode_id"] = ((ar_data.ar_status != ar_data.ar_status.shift()) | (ar_data.start_timestamp - ar_data.end_timestamp.shift() > 1)).cumsum()
|
|
|
|
grouped = ar_data.groupby(by=["local_segment", "episode_id", "ar_status"])
|
|
|
|
ar_episodes= grouped[["duration"]].sum()
|
|
|
|
ar_episodes["ar_diff"] = grouped["ar_level"].first() - grouped["ar_level"].last()
|
|
|
|
ar_episodes["ar_consumption_rate"] = ar_episodes["ar_diff"] / ar_episodes["duration"]
|
|
|
|
ar_episodes.reset_index(inplace=True)
|
|
|
|
|
|
|
|
# for discharge episodes
|
|
|
|
ar_discharge_episodes = ar_episodes[(ar_episodes["ar_status"] == 3) | (ar_episodes["ar_status"] == 4)]
|
|
|
|
ar_discharge_features = pd.DataFrame()
|
|
|
|
if "countdischarge" in features_to_compute:
|
|
|
|
ar_discharge_features["ar_rapids_countdischarge"] = ar_discharge_episodes.groupby(["local_segment"])["episode_id"].count()
|
|
|
|
if "sumdurationdischarge" in features_to_compute:
|
|
|
|
ar_discharge_features["ar_rapids_sumdurationdischarge"] = ar_discharge_episodes.groupby(["local_segment"])["duration"].sum()
|
|
|
|
if "avgconsumptionrate" in features_to_compute:
|
|
|
|
ar_discharge_features["ar_rapids_avgconsumptionrate"] = ar_discharge_episodes.groupby(["local_segment"])["ar_consumption_rate"].mean()
|
|
|
|
if "maxconsumptionrate" in features_to_compute:
|
|
|
|
ar_discharge_features["ar_rapids_maxconsumptionrate"] = ar_discharge_episodes.groupby(["local_segment"])["ar_consumption_rate"].max()
|
|
|
|
|
|
|
|
# for charge episodes
|
|
|
|
ar_charge_episodes = ar_episodes[(ar_episodes["ar_status"] == 2) | (ar_episodes["ar_status"] == 5)]
|
|
|
|
ar_charge_features = pd.DataFrame()
|
|
|
|
if "countcharge" in features_to_compute:
|
|
|
|
ar_charge_features["ar_rapids_countcharge"] = ar_charge_episodes.groupby(["local_segment"])["episode_id"].count()
|
|
|
|
if "sumdurationcharge" in features_to_compute:
|
|
|
|
ar_charge_features["ar_rapids_sumdurationcharge"] = ar_charge_episodes.groupby(["local_segment"])["duration"].sum()
|
|
|
|
|
|
|
|
# combine discharge features and charge features; fill the missing values with ZERO
|
|
|
|
ar_features = pd.concat([ar_discharge_features, ar_charge_features], axis=1, sort=True).fillna(0)
|
|
|
|
|
|
|
|
ar_features.index.rename("local_segment", inplace=True)
|
|
|
|
ar_features = ar_features.reset_index()
|
|
|
|
|
|
|
|
return ar_features
|
|
|
|
"""
|