add stats features for bvp, eda, ibi, temp

feature/plugin_sentimental
Joe Kim 2021-02-11 20:56:27 -05:00 committed by JulioV
parent c6dc7e675a
commit 4469cfd6bb
8 changed files with 355 additions and 116 deletions

View File

@ -314,18 +314,19 @@ for provider in config["EMPATICA_HEARTRATE"]["PROVIDERS"].keys():
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["EMPATICA_TEMPERATURE"]["PROVIDERS"].keys():
if config["EMPATICA_TEMPERATURE"]["PROVIDERS"][provider]["COMPUTE"]:
for pid in config["PIDS"]:
suffixes = get_zip_suffixes(pid)
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_raw_{suffix}.csv", pid=pid, suffix=suffixes))
# files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_joined.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_with_datetime.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/interim/{pid}/empatica_temperature_features/empatica_temperature_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TEMPERATURE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
# files_to_compute.extend(expand("data/processed/features/{pid}/empatica_temperature.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_joined.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/empatica_temperature_features/empatica_temperature_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TEMPERATURE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_temperature.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys():
if config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["COMPUTE"]:
@ -333,12 +334,12 @@ for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys():
suffixes = get_zip_suffixes(pid)
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_raw_{suffix}.csv", pid=pid, suffix=suffixes))
# files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_joined.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
# files_to_compute.extend(expand("data/processed/features/{pid}/empatica_electrodermal_activity.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_joined.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_electrodermal_activity.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys():
if config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["COMPUTE"]:
@ -346,12 +347,12 @@ for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys():
suffixes = get_zip_suffixes(pid)
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_raw_{suffix}.csv", pid=pid, suffix=suffixes))
# files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_joined.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
# files_to_compute.extend(expand("data/processed/features/{pid}/empatica_blood_volume_pulse.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_joined.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_blood_volume_pulse.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys():
if config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["COMPUTE"]:
@ -359,12 +360,12 @@ for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys():
suffixes = get_zip_suffixes(pid)
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_raw_{suffix}.csv", pid=pid, suffix=suffixes))
# files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_joined.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
# files_to_compute.extend(expand("data/processed/features/{pid}/empatica_inter_beat_interval.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_joined.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_inter_beat_interval.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["EMPATICA_TAGS"]["PROVIDERS"].keys():
if config["EMPATICA_TAGS"]["PROVIDERS"][provider]["COMPUTE"]:

View File

@ -446,7 +446,7 @@ EMPATICA_TEMPERATURE:
PROVIDERS:
DBDP:
COMPUTE: False
FEATURES: []
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate
SRC_LANGUAGE: "python"
@ -455,7 +455,7 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
PROVIDERS:
DBDP:
COMPUTE: False
FEATURES: []
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate
SRC_LANGUAGE: "python"
@ -464,7 +464,7 @@ EMPATICA_BLOOD_VOLUME_PULSE:
PROVIDERS:
DBDP:
COMPUTE: False
FEATURES: []
FEATURES: ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"]
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate
SRC_LANGUAGE: "python"
@ -473,7 +473,7 @@ EMPATICA_INTER_BEAT_INTERVAL:
PROVIDERS:
DBDP:
COMPUTE: False
FEATURES: []
FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"]
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate
SRC_LANGUAGE: "python"

View File

@ -14,6 +14,7 @@ local({
# signal that we're loading renv during R startup
Sys.setenv("RENV_R_INITIALIZING" = "true")
on.exit(Sys.unsetenv("RENV_R_INITIALIZING"), add = TRUE)
Sys.setenv("TZDIR" = file.path(R.home(), "share", "zoneinfo"))
# signal that we've consented to use renv
options(renv.consent = TRUE)

View File

@ -1,21 +1,76 @@
import pandas as pd
import numpy as np
from scipy.stats import entropy
def statsFeatures(bvp_data, features, bvp_features):
col_name = "blood_volume_pulse"
if "sumbvp" in features:
bvp_features["sumbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].sum()
if "maxbvp" in features:
bvp_features["maxbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max()
if "minbvp" in features:
bvp_features["minbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "avgbvp" in features:
bvp_features["avgbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].mean()
if "medianbvp" in features:
bvp_features["medianbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].median()
if "modebvp" in features:
bvp_features["modebvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "stdbvp" in features:
bvp_features["stdbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].std()
if "diffmaxmodebvp" in features:
bvp_features["diffmaxmodebvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max() - \
bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "diffminmodebvp" in features:
bvp_features["diffminmodebvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "entropybvp" in features:
bvp_features["entropybvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(entropy)
return bvp_features
def extractBVPFeaturesFromIntradayData(bvp_intraday_data, features, time_segment, filter_data_by_segment):
bvp_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
if not bvp_intraday_data.empty:
bvp_intraday_data = filter_data_by_segment(bvp_intraday_data, time_segment)
if not bvp_intraday_data.empty:
bvp_intraday_features = pd.DataFrame()
# get stats of bvp
bvp_intraday_features = statsFeatures(bvp_intraday_data, features, bvp_intraday_features)
bvp_intraday_features.reset_index(inplace=True)
return bvp_intraday_features
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
bvp_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
sensor_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_features = provider["FEATURES"]
requested_intraday_features = provider["FEATURES"]
# name of the features this function can compute
base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
base_intraday_features_names = ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp",
"diffminmodebvp", "entropybvp"]
# the subset of requested features this function can compute
features_to_compute = list(set(requested_features) & set(base_features_names))
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
if not sensor_data.empty:
sensor_data = filter_data_by_segment(sensor_data, time_segment)
# extract features from intraday data
bvp_intraday_features = extractBVPFeaturesFromIntradayData(bvp_intraday_data,
intraday_features_to_compute, time_segment,
filter_data_by_segment)
if not sensor_data.empty:
features = pd.DataFrame()
return features
return bvp_intraday_features

View File

@ -1,21 +1,76 @@
import pandas as pd
import numpy as np
from scipy.stats import entropy
def statsFeatures(eda_data, features, eda_features):
col_name = "electrodermal_activity"
if "sumeda" in features:
eda_features["sumeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].sum()
if "maxeda" in features:
eda_features["maxeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max()
if "mineda" in features:
eda_features["mineda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "avgeda" in features:
eda_features["avgeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].mean()
if "medianeda" in features:
eda_features["medianeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].median()
if "modeeda" in features:
eda_features["modeeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "stdeda" in features:
eda_features["stdeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].std()
if "diffmaxmodeeda" in features:
eda_features["diffmaxmodeeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max() - \
eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "diffminmodeeda" in features:
eda_features["diffminmodeeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "entropyeda" in features:
eda_features["entropyeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(entropy)
return eda_features
def extractEDAFeaturesFromIntradayData(eda_intraday_data, features, time_segment, filter_data_by_segment):
eda_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
if not eda_intraday_data.empty:
eda_intraday_data = filter_data_by_segment(eda_intraday_data, time_segment)
if not eda_intraday_data.empty:
eda_intraday_features = pd.DataFrame()
# get stats of eda
eda_intraday_features = statsFeatures(eda_intraday_data, features, eda_intraday_features)
eda_intraday_features.reset_index(inplace=True)
return eda_intraday_features
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
eda_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
sensor_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_features = provider["FEATURES"]
requested_intraday_features = provider["FEATURES"]
# name of the features this function can compute
base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
base_intraday_features_names = ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda",
"diffminmodeeda", "entropyeda"]
# the subset of requested features this function can compute
features_to_compute = list(set(requested_features) & set(base_features_names))
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
if not sensor_data.empty:
sensor_data = filter_data_by_segment(sensor_data, time_segment)
# extract features from intraday data
eda_intraday_features = extractEDAFeaturesFromIntradayData(eda_intraday_data,
intraday_features_to_compute, time_segment,
filter_data_by_segment)
if not sensor_data.empty:
features = pd.DataFrame()
return features
return eda_intraday_features

View File

@ -1,32 +1,47 @@
import pandas as pd
from scipy.stats import entropy
def statsFeatures(heartrate_data, features, heartrate_features):
def statsFeatures(heartrate_data, features, heartrate_features):
col_name = "heartrate"
if "sumhr" in features:
heartrate_features["sumhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].sum()
heartrate_features["sumhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].sum()
if "maxhr" in features:
heartrate_features["maxhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].max()
heartrate_features["maxhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max()
if "minhr" in features:
heartrate_features["minhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].min()
heartrate_features["minhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "avghr" in features:
heartrate_features["avghr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].mean()
heartrate_features["avghr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].mean()
if "medianhr" in features:
heartrate_features["medianhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].median()
heartrate_features["medianhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].median()
if "modehr" in features:
heartrate_features["modehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0])
heartrate_features["modehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "stdhr" in features:
heartrate_features["stdhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].std()
heartrate_features["stdhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].std()
if "diffmaxmodehr" in features:
heartrate_features["diffmaxmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].max() - heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0])
heartrate_features["diffmaxmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max() - \
heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "diffminmodehr" in features:
heartrate_features["diffminmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0]) - heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].min()
heartrate_features["diffminmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "entropyhr" in features:
heartrate_features["entropyhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(entropy)
heartrate_features["entropyhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(entropy)
return heartrate_features
def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features, time_segment, filter_data_by_segment):
heartrate_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
if not heartrate_intraday_data.empty:
@ -44,16 +59,18 @@ def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features, time_se
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
heartrate_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_intraday_features = provider["FEATURES"]
# name of the features this function can compute
base_intraday_features_names = ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr"]
base_intraday_features_names = ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr",
"diffminmodehr", "entropyhr"]
# the subset of requested features this function can compute
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
# extract features from intraday data
heartrate_intraday_features = extractHRFeaturesFromIntradayData(heartrate_intraday_data, intraday_features_to_compute, time_segment, filter_data_by_segment)
heartrate_intraday_features = extractHRFeaturesFromIntradayData(heartrate_intraday_data,
intraday_features_to_compute, time_segment,
filter_data_by_segment)
return heartrate_intraday_features

View File

@ -1,21 +1,76 @@
import pandas as pd
import numpy as np
from scipy.stats import entropy
def statsFeatures(ibi_data, features, ibi_features):
col_name = "inter_beat_interval"
if "sumibi" in features:
ibi_features["sumibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].sum()
if "maxibi" in features:
ibi_features["maxibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max()
if "minibi" in features:
ibi_features["minibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "avgibi" in features:
ibi_features["avgibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].mean()
if "medianibi" in features:
ibi_features["medianibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].median()
if "modeibi" in features:
ibi_features["modeibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "stdibi" in features:
ibi_features["stdibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].std()
if "diffmaxmodeibi" in features:
ibi_features["diffmaxmodeibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max() - \
ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "diffminmodeibi" in features:
ibi_features["diffminmodeibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "entropyibi" in features:
ibi_features["entropyibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(entropy)
return ibi_features
def extractIBIFeaturesFromIntradayData(ibi_intraday_data, features, time_segment, filter_data_by_segment):
ibi_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
if not ibi_intraday_data.empty:
ibi_intraday_data = filter_data_by_segment(ibi_intraday_data, time_segment)
if not ibi_intraday_data.empty:
ibi_intraday_features = pd.DataFrame()
# get stats of ibi
ibi_intraday_features = statsFeatures(ibi_intraday_data, features, ibi_intraday_features)
ibi_intraday_features.reset_index(inplace=True)
return ibi_intraday_features
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
ibi_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
sensor_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_features = provider["FEATURES"]
requested_intraday_features = provider["FEATURES"]
# name of the features this function can compute
base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
base_intraday_features_names = ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi",
"diffminmodeibi", "entropyibi"]
# the subset of requested features this function can compute
features_to_compute = list(set(requested_features) & set(base_features_names))
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
if not sensor_data.empty:
sensor_data = filter_data_by_segment(sensor_data, time_segment)
# extract features from intraday data
ibi_intraday_features = extractIBIFeaturesFromIntradayData(ibi_intraday_data,
intraday_features_to_compute, time_segment,
filter_data_by_segment)
if not sensor_data.empty:
features = pd.DataFrame()
return features
return ibi_intraday_features

View File

@ -1,21 +1,76 @@
import pandas as pd
import numpy as np
from scipy.stats import entropy
def statsFeatures(temperature_data, features, temperature_features):
col_name = "temperature"
if "sumtemp" in features:
temperature_features["sumtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].sum()
if "maxtemp" in features:
temperature_features["maxtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max()
if "mintemp" in features:
temperature_features["mintemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "avgtemp" in features:
temperature_features["avgtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].mean()
if "mediantemp" in features:
temperature_features["mediantemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].median()
if "modetemp" in features:
temperature_features["modetemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "stdtemp" in features:
temperature_features["stdtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].std()
if "diffmaxmodetemp" in features:
temperature_features["diffmaxmodetemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].max() - \
temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0])
if "diffminmodetemp" in features:
temperature_features["diffminmodetemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].min()
if "entropytemp" in features:
temperature_features["entropytemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
col_name].agg(entropy)
return temperature_features
def extractTempFeaturesFromIntradayData(temperature_intraday_data, features, time_segment, filter_data_by_segment):
temperature_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
if not temperature_intraday_data.empty:
temperature_intraday_data = filter_data_by_segment(temperature_intraday_data, time_segment)
if not temperature_intraday_data.empty:
temperature_intraday_features = pd.DataFrame()
# get stats of temperature
temperature_intraday_features = statsFeatures(temperature_intraday_data, features, temperature_intraday_features)
temperature_intraday_features.reset_index(inplace=True)
return temperature_intraday_features
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
sensor_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_features = provider["FEATURES"]
requested_intraday_features = provider["FEATURES"]
# name of the features this function can compute
base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
base_intraday_features_names = ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp",
"diffminmodetemp", "entropytemp"]
# the subset of requested features this function can compute
features_to_compute = list(set(requested_features) & set(base_features_names))
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
if not sensor_data.empty:
sensor_data = filter_data_by_segment(sensor_data, time_segment)
# extract features from intraday data
temperature_intraday_features = extractTempFeaturesFromIntradayData(temperature_intraday_data,
intraday_features_to_compute, time_segment,
filter_data_by_segment)
if not sensor_data.empty:
features = pd.DataFrame()
return features
return temperature_intraday_features