rapids/src/features/phone_activity_recognition/rapids/main.py

117 lines
5.0 KiB
Python
Raw Normal View History

import pandas as pd
import numpy as np
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
ar_episodes = pd.read_csv(sensor_data_files["sensor_episodes"])
activity_classes = provider["ACTIVITY_CLASSES"]
# name of the features this function can compute
base_features_names = ["count","mostcommonactivity","countuniqueactivities","durationstationary","durationmobile","durationvehicle"]
# the subset of requested features this function can compute
requested_features = provider["FEATURES"]
features_to_compute = list(set(requested_features) & set(base_features_names))
ar_features = pd.DataFrame(columns=["local_segment"] + ["ar_rapids_" + x for x in features_to_compute])
if not ar_episodes.empty:
ar_episodes = filter_data_by_segment(ar_episodes, day_segment)
if not ar_episodes.empty:
ar_features = pd.DataFrame()
if "count" in features_to_compute:
ar_features["ar_rapids_count"] = ar_episodes.groupby(["local_segment"]).count()["episode_id"]
if "mostcommonactivity" in features_to_compute:
ar_features["ar_rapids_mostcommonactivity"] = ar_episodes.groupby(["local_segment"])["activity_type"].agg(lambda x: pd.Series.mode(x)[0])
if "countuniqueactivities" in features_to_compute:
ar_features["ar_rapids_countuniqueactivities"] = ar_episodes.groupby(["local_segment"])["activity_type"].nunique()
# duration features
for column, activity_labels in activity_classes.items():
if "duration" + column.lower() in features_to_compute:
filtered_data = ar_episodes[ar_episodes["activity_name"].isin(pd.Series(activity_labels))]
if not filtered_data.empty:
2020-10-26 20:44:21 +01:00
ar_features["ar_rapids_duration" + column.lower()] = ar_episodes[ar_episodes["activity_name"].isin(pd.Series(activity_labels))].groupby(["local_segment"])["duration"].sum().fillna(0)
else:
2020-10-26 20:44:21 +01:00
ar_features["ar_rapids_duration" + column.lower()] = 0
ar_features.index.names = ["local_segment"]
ar_features = ar_features.reset_index()
return ar_features
"""
if not ar_data.empty:
ar_data = filter_data_by_segment(ar_data, day_segment)
if not ar_data.empty:
# chunk_episodes
ar_data = chunk_episodes(ar_data)
if not ar_data.empty:
ar_data["episode_id"] = ((ar_data.ar_status != ar_data.ar_status.shift()) | (ar_data.start_timestamp - ar_data.end_timestamp.shift() > 1)).cumsum()
grouped = ar_data.groupby(by=["local_segment", "episode_id", "ar_status"])
ar_episodes= grouped[["duration"]].sum()
ar_episodes["ar_diff"] = grouped["ar_level"].first() - grouped["ar_level"].last()
ar_episodes["ar_consumption_rate"] = ar_episodes["ar_diff"] / ar_episodes["duration"]
ar_episodes.reset_index(inplace=True)
# for discharge episodes
ar_discharge_episodes = ar_episodes[(ar_episodes["ar_status"] == 3) | (ar_episodes["ar_status"] == 4)]
ar_discharge_features = pd.DataFrame()
if "countdischarge" in features_to_compute:
ar_discharge_features["ar_rapids_countdischarge"] = ar_discharge_episodes.groupby(["local_segment"])["episode_id"].count()
if "sumdurationdischarge" in features_to_compute:
ar_discharge_features["ar_rapids_sumdurationdischarge"] = ar_discharge_episodes.groupby(["local_segment"])["duration"].sum()
if "avgconsumptionrate" in features_to_compute:
ar_discharge_features["ar_rapids_avgconsumptionrate"] = ar_discharge_episodes.groupby(["local_segment"])["ar_consumption_rate"].mean()
if "maxconsumptionrate" in features_to_compute:
ar_discharge_features["ar_rapids_maxconsumptionrate"] = ar_discharge_episodes.groupby(["local_segment"])["ar_consumption_rate"].max()
# for charge episodes
ar_charge_episodes = ar_episodes[(ar_episodes["ar_status"] == 2) | (ar_episodes["ar_status"] == 5)]
ar_charge_features = pd.DataFrame()
if "countcharge" in features_to_compute:
ar_charge_features["ar_rapids_countcharge"] = ar_charge_episodes.groupby(["local_segment"])["episode_id"].count()
if "sumdurationcharge" in features_to_compute:
ar_charge_features["ar_rapids_sumdurationcharge"] = ar_charge_episodes.groupby(["local_segment"])["duration"].sum()
# combine discharge features and charge features; fill the missing values with ZERO
ar_features = pd.concat([ar_discharge_features, ar_charge_features], axis=1, sort=True).fillna(0)
ar_features.index.rename("local_segment", inplace=True)
ar_features = ar_features.reset_index()
return ar_features
"""