add stats features for bvp, eda, ibi, temp
parent
c6dc7e675a
commit
4469cfd6bb
49
Snakefile
49
Snakefile
|
@ -314,18 +314,19 @@ for provider in config["EMPATICA_HEARTRATE"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
|
||||
for provider in config["EMPATICA_TEMPERATURE"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_TEMPERATURE"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
for pid in config["PIDS"]:
|
||||
suffixes = get_zip_suffixes(pid)
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_raw_{suffix}.csv", pid=pid, suffix=suffixes))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_joined.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_with_datetime.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/interim/{pid}/empatica_temperature_features/empatica_temperature_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TEMPERATURE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
# files_to_compute.extend(expand("data/processed/features/{pid}/empatica_temperature.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_joined.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_temperature_features/empatica_temperature_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TEMPERATURE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_temperature.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
|
@ -333,12 +334,12 @@ for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys():
|
|||
suffixes = get_zip_suffixes(pid)
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_raw_{suffix}.csv", pid=pid, suffix=suffixes))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_joined.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
# files_to_compute.extend(expand("data/processed/features/{pid}/empatica_electrodermal_activity.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_joined.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_electrodermal_activity.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
|
@ -346,12 +347,12 @@ for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys():
|
|||
suffixes = get_zip_suffixes(pid)
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_raw_{suffix}.csv", pid=pid, suffix=suffixes))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_joined.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
# files_to_compute.extend(expand("data/processed/features/{pid}/empatica_blood_volume_pulse.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_joined.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_blood_volume_pulse.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
|
@ -359,12 +360,12 @@ for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys():
|
|||
suffixes = get_zip_suffixes(pid)
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_raw_{suffix}.csv", pid=pid, suffix=suffixes))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_joined.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
# files_to_compute.extend(expand("data/processed/features/{pid}/empatica_inter_beat_interval.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_joined.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_inter_beat_interval.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["EMPATICA_TAGS"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_TAGS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
|
|
|
@ -446,7 +446,7 @@ EMPATICA_TEMPERATURE:
|
|||
PROVIDERS:
|
||||
DBDP:
|
||||
COMPUTE: False
|
||||
FEATURES: []
|
||||
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
|
||||
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
|
@ -455,7 +455,7 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
|
|||
PROVIDERS:
|
||||
DBDP:
|
||||
COMPUTE: False
|
||||
FEATURES: []
|
||||
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
|
||||
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
|
@ -464,7 +464,7 @@ EMPATICA_BLOOD_VOLUME_PULSE:
|
|||
PROVIDERS:
|
||||
DBDP:
|
||||
COMPUTE: False
|
||||
FEATURES: []
|
||||
FEATURES: ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"]
|
||||
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
|
@ -473,7 +473,7 @@ EMPATICA_INTER_BEAT_INTERVAL:
|
|||
PROVIDERS:
|
||||
DBDP:
|
||||
COMPUTE: False
|
||||
FEATURES: []
|
||||
FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"]
|
||||
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ local({
|
|||
# signal that we're loading renv during R startup
|
||||
Sys.setenv("RENV_R_INITIALIZING" = "true")
|
||||
on.exit(Sys.unsetenv("RENV_R_INITIALIZING"), add = TRUE)
|
||||
Sys.setenv("TZDIR" = file.path(R.home(), "share", "zoneinfo"))
|
||||
|
||||
# signal that we've consented to use renv
|
||||
options(renv.consent = TRUE)
|
||||
|
|
|
@ -1,21 +1,76 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.stats import entropy
|
||||
|
||||
|
||||
def statsFeatures(bvp_data, features, bvp_features):
|
||||
col_name = "blood_volume_pulse"
|
||||
if "sumbvp" in features:
|
||||
bvp_features["sumbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].sum()
|
||||
if "maxbvp" in features:
|
||||
bvp_features["maxbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].max()
|
||||
if "minbvp" in features:
|
||||
bvp_features["minbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].min()
|
||||
if "avgbvp" in features:
|
||||
bvp_features["avgbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].mean()
|
||||
if "medianbvp" in features:
|
||||
bvp_features["medianbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].median()
|
||||
if "modebvp" in features:
|
||||
bvp_features["modebvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "stdbvp" in features:
|
||||
bvp_features["stdbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].std()
|
||||
if "diffmaxmodebvp" in features:
|
||||
bvp_features["diffmaxmodebvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].max() - \
|
||||
bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "diffminmodebvp" in features:
|
||||
bvp_features["diffminmodebvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
|
||||
bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].min()
|
||||
if "entropybvp" in features:
|
||||
bvp_features["entropybvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(entropy)
|
||||
|
||||
return bvp_features
|
||||
|
||||
|
||||
def extractBVPFeaturesFromIntradayData(bvp_intraday_data, features, time_segment, filter_data_by_segment):
|
||||
bvp_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
|
||||
if not bvp_intraday_data.empty:
|
||||
bvp_intraday_data = filter_data_by_segment(bvp_intraday_data, time_segment)
|
||||
|
||||
if not bvp_intraday_data.empty:
|
||||
bvp_intraday_features = pd.DataFrame()
|
||||
|
||||
# get stats of bvp
|
||||
bvp_intraday_features = statsFeatures(bvp_intraday_data, features, bvp_intraday_features)
|
||||
|
||||
bvp_intraday_features.reset_index(inplace=True)
|
||||
|
||||
return bvp_intraday_features
|
||||
|
||||
|
||||
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
bvp_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
|
||||
sensor_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
requested_features = provider["FEATURES"]
|
||||
requested_intraday_features = provider["FEATURES"]
|
||||
# name of the features this function can compute
|
||||
base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
||||
base_intraday_features_names = ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp",
|
||||
"diffminmodebvp", "entropybvp"]
|
||||
# the subset of requested features this function can compute
|
||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
|
||||
|
||||
features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
|
||||
if not sensor_data.empty:
|
||||
sensor_data = filter_data_by_segment(sensor_data, time_segment)
|
||||
# extract features from intraday data
|
||||
bvp_intraday_features = extractBVPFeaturesFromIntradayData(bvp_intraday_data,
|
||||
intraday_features_to_compute, time_segment,
|
||||
filter_data_by_segment)
|
||||
|
||||
if not sensor_data.empty:
|
||||
features = pd.DataFrame()
|
||||
|
||||
|
||||
return features
|
||||
return bvp_intraday_features
|
|
@ -1,21 +1,76 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.stats import entropy
|
||||
|
||||
|
||||
def statsFeatures(eda_data, features, eda_features):
|
||||
col_name = "electrodermal_activity"
|
||||
if "sumeda" in features:
|
||||
eda_features["sumeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].sum()
|
||||
if "maxeda" in features:
|
||||
eda_features["maxeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].max()
|
||||
if "mineda" in features:
|
||||
eda_features["mineda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].min()
|
||||
if "avgeda" in features:
|
||||
eda_features["avgeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].mean()
|
||||
if "medianeda" in features:
|
||||
eda_features["medianeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].median()
|
||||
if "modeeda" in features:
|
||||
eda_features["modeeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "stdeda" in features:
|
||||
eda_features["stdeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].std()
|
||||
if "diffmaxmodeeda" in features:
|
||||
eda_features["diffmaxmodeeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].max() - \
|
||||
eda_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "diffminmodeeda" in features:
|
||||
eda_features["diffminmodeeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
|
||||
eda_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].min()
|
||||
if "entropyeda" in features:
|
||||
eda_features["entropyeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(entropy)
|
||||
|
||||
return eda_features
|
||||
|
||||
|
||||
def extractEDAFeaturesFromIntradayData(eda_intraday_data, features, time_segment, filter_data_by_segment):
|
||||
eda_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
|
||||
if not eda_intraday_data.empty:
|
||||
eda_intraday_data = filter_data_by_segment(eda_intraday_data, time_segment)
|
||||
|
||||
if not eda_intraday_data.empty:
|
||||
eda_intraday_features = pd.DataFrame()
|
||||
|
||||
# get stats of eda
|
||||
eda_intraday_features = statsFeatures(eda_intraday_data, features, eda_intraday_features)
|
||||
|
||||
eda_intraday_features.reset_index(inplace=True)
|
||||
|
||||
return eda_intraday_features
|
||||
|
||||
|
||||
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
eda_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
|
||||
sensor_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
requested_features = provider["FEATURES"]
|
||||
requested_intraday_features = provider["FEATURES"]
|
||||
# name of the features this function can compute
|
||||
base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
||||
base_intraday_features_names = ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda",
|
||||
"diffminmodeeda", "entropyeda"]
|
||||
# the subset of requested features this function can compute
|
||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
|
||||
|
||||
features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
|
||||
if not sensor_data.empty:
|
||||
sensor_data = filter_data_by_segment(sensor_data, time_segment)
|
||||
# extract features from intraday data
|
||||
eda_intraday_features = extractEDAFeaturesFromIntradayData(eda_intraday_data,
|
||||
intraday_features_to_compute, time_segment,
|
||||
filter_data_by_segment)
|
||||
|
||||
if not sensor_data.empty:
|
||||
features = pd.DataFrame()
|
||||
|
||||
|
||||
return features
|
||||
return eda_intraday_features
|
|
@ -1,32 +1,47 @@
|
|||
import pandas as pd
|
||||
from scipy.stats import entropy
|
||||
|
||||
def statsFeatures(heartrate_data, features, heartrate_features):
|
||||
|
||||
def statsFeatures(heartrate_data, features, heartrate_features):
|
||||
col_name = "heartrate"
|
||||
if "sumhr" in features:
|
||||
heartrate_features["sumhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].sum()
|
||||
heartrate_features["sumhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].sum()
|
||||
if "maxhr" in features:
|
||||
heartrate_features["maxhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].max()
|
||||
heartrate_features["maxhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].max()
|
||||
if "minhr" in features:
|
||||
heartrate_features["minhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].min()
|
||||
heartrate_features["minhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].min()
|
||||
if "avghr" in features:
|
||||
heartrate_features["avghr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].mean()
|
||||
heartrate_features["avghr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].mean()
|
||||
if "medianhr" in features:
|
||||
heartrate_features["medianhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].median()
|
||||
heartrate_features["medianhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].median()
|
||||
if "modehr" in features:
|
||||
heartrate_features["modehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||
heartrate_features["modehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "stdhr" in features:
|
||||
heartrate_features["stdhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].std()
|
||||
heartrate_features["stdhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].std()
|
||||
if "diffmaxmodehr" in features:
|
||||
heartrate_features["diffmaxmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].max() - heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||
heartrate_features["diffmaxmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].max() - \
|
||||
heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "diffminmodehr" in features:
|
||||
heartrate_features["diffminmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0]) - heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].min()
|
||||
heartrate_features["diffminmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
|
||||
heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].min()
|
||||
if "entropyhr" in features:
|
||||
heartrate_features["entropyhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(entropy)
|
||||
heartrate_features["entropyhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(entropy)
|
||||
|
||||
return heartrate_features
|
||||
|
||||
|
||||
def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features, time_segment, filter_data_by_segment):
|
||||
heartrate_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
|
||||
if not heartrate_intraday_data.empty:
|
||||
|
@ -44,16 +59,18 @@ def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features, time_se
|
|||
|
||||
|
||||
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
|
||||
heartrate_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
|
||||
requested_intraday_features = provider["FEATURES"]
|
||||
# name of the features this function can compute
|
||||
base_intraday_features_names = ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr"]
|
||||
base_intraday_features_names = ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr",
|
||||
"diffminmodehr", "entropyhr"]
|
||||
# the subset of requested features this function can compute
|
||||
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
|
||||
|
||||
# extract features from intraday data
|
||||
heartrate_intraday_features = extractHRFeaturesFromIntradayData(heartrate_intraday_data, intraday_features_to_compute, time_segment, filter_data_by_segment)
|
||||
heartrate_intraday_features = extractHRFeaturesFromIntradayData(heartrate_intraday_data,
|
||||
intraday_features_to_compute, time_segment,
|
||||
filter_data_by_segment)
|
||||
|
||||
return heartrate_intraday_features
|
|
@ -1,21 +1,76 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.stats import entropy
|
||||
|
||||
|
||||
def statsFeatures(ibi_data, features, ibi_features):
|
||||
col_name = "inter_beat_interval"
|
||||
if "sumibi" in features:
|
||||
ibi_features["sumibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].sum()
|
||||
if "maxibi" in features:
|
||||
ibi_features["maxibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].max()
|
||||
if "minibi" in features:
|
||||
ibi_features["minibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].min()
|
||||
if "avgibi" in features:
|
||||
ibi_features["avgibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].mean()
|
||||
if "medianibi" in features:
|
||||
ibi_features["medianibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].median()
|
||||
if "modeibi" in features:
|
||||
ibi_features["modeibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "stdibi" in features:
|
||||
ibi_features["stdibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].std()
|
||||
if "diffmaxmodeibi" in features:
|
||||
ibi_features["diffmaxmodeibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].max() - \
|
||||
ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "diffminmodeibi" in features:
|
||||
ibi_features["diffminmodeibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
|
||||
ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].min()
|
||||
if "entropyibi" in features:
|
||||
ibi_features["entropyibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(entropy)
|
||||
|
||||
return ibi_features
|
||||
|
||||
|
||||
def extractIBIFeaturesFromIntradayData(ibi_intraday_data, features, time_segment, filter_data_by_segment):
|
||||
ibi_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
|
||||
if not ibi_intraday_data.empty:
|
||||
ibi_intraday_data = filter_data_by_segment(ibi_intraday_data, time_segment)
|
||||
|
||||
if not ibi_intraday_data.empty:
|
||||
ibi_intraday_features = pd.DataFrame()
|
||||
|
||||
# get stats of ibi
|
||||
ibi_intraday_features = statsFeatures(ibi_intraday_data, features, ibi_intraday_features)
|
||||
|
||||
ibi_intraday_features.reset_index(inplace=True)
|
||||
|
||||
return ibi_intraday_features
|
||||
|
||||
|
||||
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
ibi_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
|
||||
sensor_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
requested_features = provider["FEATURES"]
|
||||
requested_intraday_features = provider["FEATURES"]
|
||||
# name of the features this function can compute
|
||||
base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
||||
base_intraday_features_names = ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi",
|
||||
"diffminmodeibi", "entropyibi"]
|
||||
# the subset of requested features this function can compute
|
||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
|
||||
|
||||
features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
|
||||
if not sensor_data.empty:
|
||||
sensor_data = filter_data_by_segment(sensor_data, time_segment)
|
||||
# extract features from intraday data
|
||||
ibi_intraday_features = extractIBIFeaturesFromIntradayData(ibi_intraday_data,
|
||||
intraday_features_to_compute, time_segment,
|
||||
filter_data_by_segment)
|
||||
|
||||
if not sensor_data.empty:
|
||||
features = pd.DataFrame()
|
||||
|
||||
|
||||
return features
|
||||
return ibi_intraday_features
|
|
@ -1,21 +1,76 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.stats import entropy
|
||||
|
||||
|
||||
def statsFeatures(temperature_data, features, temperature_features):
|
||||
col_name = "temperature"
|
||||
if "sumtemp" in features:
|
||||
temperature_features["sumtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].sum()
|
||||
if "maxtemp" in features:
|
||||
temperature_features["maxtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].max()
|
||||
if "mintemp" in features:
|
||||
temperature_features["mintemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].min()
|
||||
if "avgtemp" in features:
|
||||
temperature_features["avgtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].mean()
|
||||
if "mediantemp" in features:
|
||||
temperature_features["mediantemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].median()
|
||||
if "modetemp" in features:
|
||||
temperature_features["modetemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "stdtemp" in features:
|
||||
temperature_features["stdtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].std()
|
||||
if "diffmaxmodetemp" in features:
|
||||
temperature_features["diffmaxmodetemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].max() - \
|
||||
temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "diffminmodetemp" in features:
|
||||
temperature_features["diffminmodetemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(lambda x: pd.Series.mode(x)[0]) - \
|
||||
temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].min()
|
||||
if "entropytemp" in features:
|
||||
temperature_features["entropytemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[
|
||||
col_name].agg(entropy)
|
||||
|
||||
return temperature_features
|
||||
|
||||
|
||||
def extractTempFeaturesFromIntradayData(temperature_intraday_data, features, time_segment, filter_data_by_segment):
|
||||
temperature_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
|
||||
if not temperature_intraday_data.empty:
|
||||
temperature_intraday_data = filter_data_by_segment(temperature_intraday_data, time_segment)
|
||||
|
||||
if not temperature_intraday_data.empty:
|
||||
temperature_intraday_features = pd.DataFrame()
|
||||
|
||||
# get stats of temperature
|
||||
temperature_intraday_features = statsFeatures(temperature_intraday_data, features, temperature_intraday_features)
|
||||
|
||||
temperature_intraday_features.reset_index(inplace=True)
|
||||
|
||||
return temperature_intraday_features
|
||||
|
||||
|
||||
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
|
||||
sensor_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
requested_features = provider["FEATURES"]
|
||||
requested_intraday_features = provider["FEATURES"]
|
||||
# name of the features this function can compute
|
||||
base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
||||
base_intraday_features_names = ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp",
|
||||
"diffminmodetemp", "entropytemp"]
|
||||
# the subset of requested features this function can compute
|
||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
|
||||
|
||||
features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
|
||||
if not sensor_data.empty:
|
||||
sensor_data = filter_data_by_segment(sensor_data, time_segment)
|
||||
# extract features from intraday data
|
||||
temperature_intraday_features = extractTempFeaturesFromIntradayData(temperature_intraday_data,
|
||||
intraday_features_to_compute, time_segment,
|
||||
filter_data_by_segment)
|
||||
|
||||
if not sensor_data.empty:
|
||||
features = pd.DataFrame()
|
||||
|
||||
|
||||
return features
|
||||
return temperature_intraday_features
|
Loading…
Reference in New Issue