2020-10-27 22:55:57 +01:00
|
|
|
import pandas as pd
|
|
|
|
from scipy.stats import entropy
|
|
|
|
|
2020-10-28 22:02:21 +01:00
|
|
|
def statsFeatures(heartrate_data, features, features_type, heartrate_features):
|
|
|
|
|
|
|
|
if features_type == "hr":
|
|
|
|
col_name = "heartrate"
|
|
|
|
elif features_type == "restinghr":
|
|
|
|
col_name = "heartrate_daily_restinghr"
|
|
|
|
elif features_type == "caloriesoutofrange":
|
|
|
|
col_name = "heartrate_daily_caloriesoutofrange"
|
|
|
|
elif features_type == "caloriesfatburn":
|
|
|
|
col_name = "heartrate_daily_caloriesfatburn"
|
|
|
|
elif features_type == "caloriescardio":
|
|
|
|
col_name = "heartrate_daily_caloriescardio"
|
|
|
|
elif features_type == "caloriespeak":
|
|
|
|
col_name = "heartrate_daily_caloriespeak"
|
|
|
|
else:
|
|
|
|
raise ValueError("features_type can only be one of ['hr', 'restinghr', 'caloriesoutofrange', 'caloriesfatburn', 'caloriescardio', 'caloriespeak'].")
|
|
|
|
|
2020-11-20 17:31:16 +01:00
|
|
|
if "intradaysum" + features_type in features:
|
|
|
|
heartrate_features["heartrate_rapids_intradaysum" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].sum()
|
|
|
|
if "intradaymax" + features_type in features:
|
|
|
|
heartrate_features["heartrate_rapids_intradaymax" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].max()
|
|
|
|
if "intradaymin" + features_type in features:
|
|
|
|
heartrate_features["heartrate_rapids_intradaymin" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].min()
|
|
|
|
if "intradayavg" + features_type in features:
|
|
|
|
heartrate_features["heartrate_rapids_intradayavg" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].mean()
|
|
|
|
if "intradaymedian" + features_type in features:
|
|
|
|
heartrate_features["heartrate_rapids_intradaymedian" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].median()
|
|
|
|
if "intradaymode" + features_type in features:
|
|
|
|
heartrate_features["heartrate_rapids_intradaymode" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0])
|
|
|
|
if "intradaystd" + features_type in features:
|
|
|
|
heartrate_features["heartrate_rapids_intradaystd" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].std()
|
|
|
|
if "intradaydiffmaxmode" + features_type in features:
|
|
|
|
heartrate_features["heartrate_rapids_intradaydiffmaxmode" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].max() - heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0])
|
|
|
|
if "intradaydiffminmode" + features_type in features:
|
|
|
|
heartrate_features["heartrate_rapids_intradaydiffminmode" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0]) - heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].min()
|
|
|
|
if "intradayentropy" + features_type in features:
|
|
|
|
heartrate_features["heartrate_rapids_intradayentropy" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(entropy)
|
2020-10-28 22:02:21 +01:00
|
|
|
|
|
|
|
return heartrate_features
|
|
|
|
|
2020-10-27 22:55:57 +01:00
|
|
|
def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features, day_segment, filter_data_by_segment):
|
|
|
|
heartrate_intraday_features = pd.DataFrame(columns=["local_segment"] + ["heartrate_rapids_" + x for x in features])
|
|
|
|
if not heartrate_intraday_data.empty:
|
|
|
|
num_rows_per_minute = heartrate_intraday_data.groupby(["local_date", "local_hour", "local_minute"]).count().mean()["device_id"]
|
|
|
|
heartrate_intraday_data = filter_data_by_segment(heartrate_intraday_data, day_segment)
|
|
|
|
|
|
|
|
if not heartrate_intraday_data.empty:
|
|
|
|
heartrate_intraday_features = pd.DataFrame()
|
|
|
|
|
|
|
|
# get stats of heartrate
|
2020-10-28 22:02:21 +01:00
|
|
|
heartrate_intraday_features = statsFeatures(heartrate_intraday_data, features, "hr", heartrate_intraday_features)
|
2020-10-27 22:55:57 +01:00
|
|
|
|
|
|
|
# get number of minutes in each heart rate zone
|
2020-11-20 17:31:16 +01:00
|
|
|
for feature_name in list(set(["intradayminutesonoutofrangezone", "intradayminutesonfatburnzone", "intradayminutesoncardiozone", "intradayminutesonpeakzone"]) & set(features)):
|
|
|
|
heartrate_zone = heartrate_intraday_data[heartrate_intraday_data["heartrate_zone"] == feature_name[17:-4]]
|
2020-10-27 22:55:57 +01:00
|
|
|
heartrate_intraday_features["heartrate_rapids_" + feature_name] = heartrate_zone.groupby(["local_segment"])["device_id"].count() / num_rows_per_minute
|
|
|
|
heartrate_intraday_features.fillna(value={"heartrate_rapids_" + feature_name: 0}, inplace=True)
|
|
|
|
heartrate_intraday_features.reset_index(inplace=True)
|
|
|
|
|
|
|
|
return heartrate_intraday_features
|
|
|
|
|
|
|
|
|
|
|
|
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
|
|
|
|
2020-11-11 23:27:46 +01:00
|
|
|
heartrate_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
|
2020-10-27 22:55:57 +01:00
|
|
|
|
2020-11-20 17:31:16 +01:00
|
|
|
requested_intraday_features = ["intraday" + x for x in provider["FEATURES"]]
|
2020-10-27 22:55:57 +01:00
|
|
|
# name of the features this function can compute
|
2020-11-20 17:31:16 +01:00
|
|
|
base_intraday_features_names = ["intradaymaxhr", "intradayminhr", "intradayavghr", "intradaymedianhr", "intradaymodehr", "intradaystdhr", "intradaydiffmaxmodehr", "intradaydiffminmodehr", "intradayentropyhr", "intradayminutesonoutofrangezone", "intradayminutesonfatburnzone", "intradayminutesoncardiozone", "intradayminutesonpeakzone"]
|
2020-10-27 22:55:57 +01:00
|
|
|
# the subset of requested features this function can compute
|
|
|
|
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
|
2020-10-28 22:02:21 +01:00
|
|
|
|
|
|
|
# extract features from intraday data
|
|
|
|
heartrate_intraday_features = extractHRFeaturesFromIntradayData(heartrate_intraday_data, intraday_features_to_compute, day_segment, filter_data_by_segment)
|
|
|
|
|
2020-11-11 23:27:46 +01:00
|
|
|
return heartrate_intraday_features
|