From 4469cfd6bb05d5e7a13b296dbedfe39038b831d9 Mon Sep 17 00:00:00 2001 From: Joe Kim Date: Thu, 11 Feb 2021 20:56:27 -0500 Subject: [PATCH] add stats features for bvp, eda, ibi, temp --- Snakefile | 85 ++++++++++--------- config.yaml | 8 +- renv/activate.R | 1 + .../empatica_blood_volume_pulse/dbdp/main.py | 81 +++++++++++++++--- .../dbdp/main.py | 81 +++++++++++++++--- src/features/empatica_heartrate/dbdp/main.py | 53 ++++++++---- .../empatica_inter_beat_interval/dbdp/main.py | 81 +++++++++++++++--- .../empatica_temperature/dbdp/main.py | 81 +++++++++++++++--- 8 files changed, 355 insertions(+), 116 deletions(-) diff --git a/Snakefile b/Snakefile index 402ed2e0..4c1fae74 100644 --- a/Snakefile +++ b/Snakefile @@ -294,12 +294,12 @@ for provider in config["EMPATICA_ACCELEROMETER"]["PROVIDERS"].keys(): suffixes = get_zip_suffixes(pid) files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_unzipped_{suffix}.csv", pid=pid, suffix=suffixes)) files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_raw_{suffix}.csv", pid=pid, suffix=suffixes)) - files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_joined.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_with_datetime.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) - files_to_compute.extend(expand("data/processed/features/{pid}/empatica_accelerometer.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) - files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_joined.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/empatica_accelerometer.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") for provider in config["EMPATICA_HEARTRATE"]["PROVIDERS"].keys(): if config["EMPATICA_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]: @@ -307,12 +307,13 @@ for provider in config["EMPATICA_HEARTRATE"]["PROVIDERS"].keys(): suffixes = get_zip_suffixes(pid) files_to_compute.extend(expand("data/raw/{pid}/empatica_heartrate_unzipped_{suffix}.csv", pid=pid, suffix=suffixes)) files_to_compute.extend(expand("data/raw/{pid}/empatica_heartrate_raw_{suffix}.csv", pid=pid, suffix=suffixes)) - files_to_compute.extend(expand("data/raw/{pid}/empatica_heartrate_joined.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/raw/{pid}/empatica_heartrate_with_datetime.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_HEARTRATE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) - files_to_compute.extend(expand("data/processed/features/{pid}/empatica_heartrate.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) - files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + files_to_compute.extend(expand("data/raw/{pid}/empatica_heartrate_joined.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/empatica_heartrate_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_HEARTRATE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/empatica_heartrate.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + for provider in config["EMPATICA_TEMPERATURE"]["PROVIDERS"].keys(): if config["EMPATICA_TEMPERATURE"]["PROVIDERS"][provider]["COMPUTE"]: @@ -320,12 +321,12 @@ for provider in config["EMPATICA_TEMPERATURE"]["PROVIDERS"].keys(): suffixes = get_zip_suffixes(pid) files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_unzipped_{suffix}.csv", pid=pid, suffix=suffixes)) files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_raw_{suffix}.csv", pid=pid, suffix=suffixes)) -# files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_joined.csv", pid=config["PIDS"])) -# files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_with_datetime.csv", pid=config["PIDS"])) -# files_to_compute.extend(expand("data/interim/{pid}/empatica_temperature_features/empatica_temperature_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TEMPERATURE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) -# files_to_compute.extend(expand("data/processed/features/{pid}/empatica_temperature.csv", pid=config["PIDS"])) -# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) -# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_joined.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/empatica_temperature_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/empatica_temperature_features/empatica_temperature_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TEMPERATURE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/empatica_temperature.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys(): if config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["COMPUTE"]: @@ -333,12 +334,12 @@ for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys(): suffixes = get_zip_suffixes(pid) files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_unzipped_{suffix}.csv", pid=pid, suffix=suffixes)) files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_raw_{suffix}.csv", pid=pid, suffix=suffixes)) -# files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_joined.csv", pid=config["PIDS"])) -# files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv", pid=config["PIDS"])) -# files_to_compute.extend(expand("data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) -# files_to_compute.extend(expand("data/processed/features/{pid}/empatica_electrodermal_activity.csv", pid=config["PIDS"])) -# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) -# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_joined.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/empatica_electrodermal_activity.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys(): if config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["COMPUTE"]: @@ -346,12 +347,12 @@ for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys(): suffixes = get_zip_suffixes(pid) files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_unzipped_{suffix}.csv", pid=pid, suffix=suffixes)) files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_raw_{suffix}.csv", pid=pid, suffix=suffixes)) -# files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_joined.csv", pid=config["PIDS"])) -# files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv", pid=config["PIDS"])) -# files_to_compute.extend(expand("data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) -# files_to_compute.extend(expand("data/processed/features/{pid}/empatica_blood_volume_pulse.csv", pid=config["PIDS"])) -# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) -# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_joined.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/empatica_blood_volume_pulse.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys(): if config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["COMPUTE"]: @@ -359,12 +360,12 @@ for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys(): suffixes = get_zip_suffixes(pid) files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_unzipped_{suffix}.csv", pid=pid, suffix=suffixes)) files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_raw_{suffix}.csv", pid=pid, suffix=suffixes)) -# files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_joined.csv", pid=config["PIDS"])) -# files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv", pid=config["PIDS"])) -# files_to_compute.extend(expand("data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) -# files_to_compute.extend(expand("data/processed/features/{pid}/empatica_inter_beat_interval.csv", pid=config["PIDS"])) -# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) -# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_joined.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/empatica_inter_beat_interval.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") for provider in config["EMPATICA_TAGS"]["PROVIDERS"].keys(): if config["EMPATICA_TAGS"]["PROVIDERS"][provider]["COMPUTE"]: @@ -372,12 +373,12 @@ for provider in config["EMPATICA_TAGS"]["PROVIDERS"].keys(): suffixes = get_zip_suffixes(pid) files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_unzipped_{suffix}.csv", pid=pid, suffix=suffixes)) files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_raw_{suffix}.csv", pid=pid, suffix=suffixes)) - files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_joined.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_with_datetime.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/interim/{pid}/empatica_tags_features/empatica_tags_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TAGS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) - files_to_compute.extend(expand("data/processed/features/{pid}/empatica_tags.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) - files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_joined.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/empatica_tags_features/empatica_tags_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TAGS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/empatica_tags.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") # Visualization for Data Exploration if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]: diff --git a/config.yaml b/config.yaml index 18f6c6ef..ed40edd9 100644 --- a/config.yaml +++ b/config.yaml @@ -446,7 +446,7 @@ EMPATICA_TEMPERATURE: PROVIDERS: DBDP: COMPUTE: False - FEATURES: [] + FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"] SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate SRC_LANGUAGE: "python" @@ -455,7 +455,7 @@ EMPATICA_ELECTRODERMAL_ACTIVITY: PROVIDERS: DBDP: COMPUTE: False - FEATURES: [] + FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"] SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate SRC_LANGUAGE: "python" @@ -464,7 +464,7 @@ EMPATICA_BLOOD_VOLUME_PULSE: PROVIDERS: DBDP: COMPUTE: False - FEATURES: [] + FEATURES: ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"] SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate SRC_LANGUAGE: "python" @@ -473,7 +473,7 @@ EMPATICA_INTER_BEAT_INTERVAL: PROVIDERS: DBDP: COMPUTE: False - FEATURES: [] + FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"] SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate SRC_LANGUAGE: "python" diff --git a/renv/activate.R b/renv/activate.R index 6841a967..5fbb4d27 100644 --- a/renv/activate.R +++ b/renv/activate.R @@ -14,6 +14,7 @@ local({ # signal that we're loading renv during R startup Sys.setenv("RENV_R_INITIALIZING" = "true") on.exit(Sys.unsetenv("RENV_R_INITIALIZING"), add = TRUE) + Sys.setenv("TZDIR" = file.path(R.home(), "share", "zoneinfo")) # signal that we've consented to use renv options(renv.consent = TRUE) diff --git a/src/features/empatica_blood_volume_pulse/dbdp/main.py b/src/features/empatica_blood_volume_pulse/dbdp/main.py index 82da2f3a..788b84e0 100644 --- a/src/features/empatica_blood_volume_pulse/dbdp/main.py +++ b/src/features/empatica_blood_volume_pulse/dbdp/main.py @@ -1,21 +1,76 @@ import pandas as pd -import numpy as np +from scipy.stats import entropy + + +def statsFeatures(bvp_data, features, bvp_features): + col_name = "blood_volume_pulse" + if "sumbvp" in features: + bvp_features["sumbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].sum() + if "maxbvp" in features: + bvp_features["maxbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].max() + if "minbvp" in features: + bvp_features["minbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].min() + if "avgbvp" in features: + bvp_features["avgbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].mean() + if "medianbvp" in features: + bvp_features["medianbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].median() + if "modebvp" in features: + bvp_features["modebvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) + if "stdbvp" in features: + bvp_features["stdbvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].std() + if "diffmaxmodebvp" in features: + bvp_features["diffmaxmodebvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].max() - \ + bvp_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) + if "diffminmodebvp" in features: + bvp_features["diffminmodebvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) - \ + bvp_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].min() + if "entropybvp" in features: + bvp_features["entropybvp"] = bvp_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(entropy) + + return bvp_features + + +def extractBVPFeaturesFromIntradayData(bvp_intraday_data, features, time_segment, filter_data_by_segment): + bvp_intraday_features = pd.DataFrame(columns=["local_segment"] + features) + if not bvp_intraday_data.empty: + bvp_intraday_data = filter_data_by_segment(bvp_intraday_data, time_segment) + + if not bvp_intraday_data.empty: + bvp_intraday_features = pd.DataFrame() + + # get stats of bvp + bvp_intraday_features = statsFeatures(bvp_intraday_data, features, bvp_intraday_features) + + bvp_intraday_features.reset_index(inplace=True) + + return bvp_intraday_features + def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): + bvp_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) - sensor_data = pd.read_csv(sensor_data_files["sensor_data"]) - requested_features = provider["FEATURES"] + requested_intraday_features = provider["FEATURES"] # name of the features this function can compute - base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] + base_intraday_features_names = ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", + "diffminmodebvp", "entropybvp"] # the subset of requested features this function can compute - features_to_compute = list(set(requested_features) & set(base_features_names)) + intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names)) - features = pd.DataFrame(columns=["local_segment"] + features_to_compute) - if not sensor_data.empty: - sensor_data = filter_data_by_segment(sensor_data, time_segment) - - if not sensor_data.empty: - features = pd.DataFrame() - + # extract features from intraday data + bvp_intraday_features = extractBVPFeaturesFromIntradayData(bvp_intraday_data, + intraday_features_to_compute, time_segment, + filter_data_by_segment) - return features \ No newline at end of file + return bvp_intraday_features \ No newline at end of file diff --git a/src/features/empatica_electrodermal_activity/dbdp/main.py b/src/features/empatica_electrodermal_activity/dbdp/main.py index 82da2f3a..eaf37c57 100644 --- a/src/features/empatica_electrodermal_activity/dbdp/main.py +++ b/src/features/empatica_electrodermal_activity/dbdp/main.py @@ -1,21 +1,76 @@ import pandas as pd -import numpy as np +from scipy.stats import entropy + + +def statsFeatures(eda_data, features, eda_features): + col_name = "electrodermal_activity" + if "sumeda" in features: + eda_features["sumeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].sum() + if "maxeda" in features: + eda_features["maxeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].max() + if "mineda" in features: + eda_features["mineda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].min() + if "avgeda" in features: + eda_features["avgeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].mean() + if "medianeda" in features: + eda_features["medianeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].median() + if "modeeda" in features: + eda_features["modeeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) + if "stdeda" in features: + eda_features["stdeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].std() + if "diffmaxmodeeda" in features: + eda_features["diffmaxmodeeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].max() - \ + eda_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) + if "diffminmodeeda" in features: + eda_features["diffminmodeeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) - \ + eda_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].min() + if "entropyeda" in features: + eda_features["entropyeda"] = eda_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(entropy) + + return eda_features + + +def extractEDAFeaturesFromIntradayData(eda_intraday_data, features, time_segment, filter_data_by_segment): + eda_intraday_features = pd.DataFrame(columns=["local_segment"] + features) + if not eda_intraday_data.empty: + eda_intraday_data = filter_data_by_segment(eda_intraday_data, time_segment) + + if not eda_intraday_data.empty: + eda_intraday_features = pd.DataFrame() + + # get stats of eda + eda_intraday_features = statsFeatures(eda_intraday_data, features, eda_intraday_features) + + eda_intraday_features.reset_index(inplace=True) + + return eda_intraday_features + def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): + eda_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) - sensor_data = pd.read_csv(sensor_data_files["sensor_data"]) - requested_features = provider["FEATURES"] + requested_intraday_features = provider["FEATURES"] # name of the features this function can compute - base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] + base_intraday_features_names = ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", + "diffminmodeeda", "entropyeda"] # the subset of requested features this function can compute - features_to_compute = list(set(requested_features) & set(base_features_names)) + intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names)) - features = pd.DataFrame(columns=["local_segment"] + features_to_compute) - if not sensor_data.empty: - sensor_data = filter_data_by_segment(sensor_data, time_segment) - - if not sensor_data.empty: - features = pd.DataFrame() - + # extract features from intraday data + eda_intraday_features = extractEDAFeaturesFromIntradayData(eda_intraday_data, + intraday_features_to_compute, time_segment, + filter_data_by_segment) - return features \ No newline at end of file + return eda_intraday_features \ No newline at end of file diff --git a/src/features/empatica_heartrate/dbdp/main.py b/src/features/empatica_heartrate/dbdp/main.py index 4f6cc2e2..00dd3936 100644 --- a/src/features/empatica_heartrate/dbdp/main.py +++ b/src/features/empatica_heartrate/dbdp/main.py @@ -1,32 +1,47 @@ import pandas as pd from scipy.stats import entropy -def statsFeatures(heartrate_data, features, heartrate_features): +def statsFeatures(heartrate_data, features, heartrate_features): col_name = "heartrate" if "sumhr" in features: - heartrate_features["sumhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].sum() + heartrate_features["sumhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].sum() if "maxhr" in features: - heartrate_features["maxhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].max() + heartrate_features["maxhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].max() if "minhr" in features: - heartrate_features["minhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].min() + heartrate_features["minhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].min() if "avghr" in features: - heartrate_features["avghr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].mean() + heartrate_features["avghr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].mean() if "medianhr" in features: - heartrate_features["medianhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].median() + heartrate_features["medianhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].median() if "modehr" in features: - heartrate_features["modehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0]) + heartrate_features["modehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) if "stdhr" in features: - heartrate_features["stdhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].std() + heartrate_features["stdhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].std() if "diffmaxmodehr" in features: - heartrate_features["diffmaxmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].max() - heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0]) + heartrate_features["diffmaxmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].max() - \ + heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) if "diffminmodehr" in features: - heartrate_features["diffminmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0]) - heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].min() + heartrate_features["diffminmodehr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) - \ + heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].min() if "entropyhr" in features: - heartrate_features["entropyhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(entropy) + heartrate_features["entropyhr"] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(entropy) return heartrate_features + def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features, time_segment, filter_data_by_segment): heartrate_intraday_features = pd.DataFrame(columns=["local_segment"] + features) if not heartrate_intraday_data.empty: @@ -34,7 +49,7 @@ def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features, time_se if not heartrate_intraday_data.empty: heartrate_intraday_features = pd.DataFrame() - + # get stats of heartrate heartrate_intraday_features = statsFeatures(heartrate_intraday_data, features, heartrate_intraday_features) @@ -44,16 +59,18 @@ def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features, time_se def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): - heartrate_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) requested_intraday_features = provider["FEATURES"] # name of the features this function can compute - base_intraday_features_names = ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr"] + base_intraday_features_names = ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", + "diffminmodehr", "entropyhr"] # the subset of requested features this function can compute intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names)) - + # extract features from intraday data - heartrate_intraday_features = extractHRFeaturesFromIntradayData(heartrate_intraday_data, intraday_features_to_compute, time_segment, filter_data_by_segment) - - return heartrate_intraday_features + heartrate_intraday_features = extractHRFeaturesFromIntradayData(heartrate_intraday_data, + intraday_features_to_compute, time_segment, + filter_data_by_segment) + + return heartrate_intraday_features \ No newline at end of file diff --git a/src/features/empatica_inter_beat_interval/dbdp/main.py b/src/features/empatica_inter_beat_interval/dbdp/main.py index 82da2f3a..8faae507 100644 --- a/src/features/empatica_inter_beat_interval/dbdp/main.py +++ b/src/features/empatica_inter_beat_interval/dbdp/main.py @@ -1,21 +1,76 @@ import pandas as pd -import numpy as np +from scipy.stats import entropy + + +def statsFeatures(ibi_data, features, ibi_features): + col_name = "inter_beat_interval" + if "sumibi" in features: + ibi_features["sumibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].sum() + if "maxibi" in features: + ibi_features["maxibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].max() + if "minibi" in features: + ibi_features["minibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].min() + if "avgibi" in features: + ibi_features["avgibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].mean() + if "medianibi" in features: + ibi_features["medianibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].median() + if "modeibi" in features: + ibi_features["modeibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) + if "stdibi" in features: + ibi_features["stdibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].std() + if "diffmaxmodeibi" in features: + ibi_features["diffmaxmodeibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].max() - \ + ibi_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) + if "diffminmodeibi" in features: + ibi_features["diffminmodeibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) - \ + ibi_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].min() + if "entropyibi" in features: + ibi_features["entropyibi"] = ibi_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(entropy) + + return ibi_features + + +def extractIBIFeaturesFromIntradayData(ibi_intraday_data, features, time_segment, filter_data_by_segment): + ibi_intraday_features = pd.DataFrame(columns=["local_segment"] + features) + if not ibi_intraday_data.empty: + ibi_intraday_data = filter_data_by_segment(ibi_intraday_data, time_segment) + + if not ibi_intraday_data.empty: + ibi_intraday_features = pd.DataFrame() + + # get stats of ibi + ibi_intraday_features = statsFeatures(ibi_intraday_data, features, ibi_intraday_features) + + ibi_intraday_features.reset_index(inplace=True) + + return ibi_intraday_features + def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): + ibi_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) - sensor_data = pd.read_csv(sensor_data_files["sensor_data"]) - requested_features = provider["FEATURES"] + requested_intraday_features = provider["FEATURES"] # name of the features this function can compute - base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] + base_intraday_features_names = ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", + "diffminmodeibi", "entropyibi"] # the subset of requested features this function can compute - features_to_compute = list(set(requested_features) & set(base_features_names)) + intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names)) - features = pd.DataFrame(columns=["local_segment"] + features_to_compute) - if not sensor_data.empty: - sensor_data = filter_data_by_segment(sensor_data, time_segment) - - if not sensor_data.empty: - features = pd.DataFrame() - + # extract features from intraday data + ibi_intraday_features = extractIBIFeaturesFromIntradayData(ibi_intraday_data, + intraday_features_to_compute, time_segment, + filter_data_by_segment) - return features \ No newline at end of file + return ibi_intraday_features \ No newline at end of file diff --git a/src/features/empatica_temperature/dbdp/main.py b/src/features/empatica_temperature/dbdp/main.py index 82da2f3a..1a25143b 100644 --- a/src/features/empatica_temperature/dbdp/main.py +++ b/src/features/empatica_temperature/dbdp/main.py @@ -1,21 +1,76 @@ import pandas as pd -import numpy as np +from scipy.stats import entropy + + +def statsFeatures(temperature_data, features, temperature_features): + col_name = "temperature" + if "sumtemp" in features: + temperature_features["sumtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].sum() + if "maxtemp" in features: + temperature_features["maxtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].max() + if "mintemp" in features: + temperature_features["mintemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].min() + if "avgtemp" in features: + temperature_features["avgtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].mean() + if "mediantemp" in features: + temperature_features["mediantemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].median() + if "modetemp" in features: + temperature_features["modetemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) + if "stdtemp" in features: + temperature_features["stdtemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].std() + if "diffmaxmodetemp" in features: + temperature_features["diffmaxmodetemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].max() - \ + temperature_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) + if "diffminmodetemp" in features: + temperature_features["diffminmodetemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(lambda x: pd.Series.mode(x)[0]) - \ + temperature_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].min() + if "entropytemp" in features: + temperature_features["entropytemp"] = temperature_data[["local_segment", col_name]].groupby(["local_segment"])[ + col_name].agg(entropy) + + return temperature_features + + +def extractTempFeaturesFromIntradayData(temperature_intraday_data, features, time_segment, filter_data_by_segment): + temperature_intraday_features = pd.DataFrame(columns=["local_segment"] + features) + if not temperature_intraday_data.empty: + temperature_intraday_data = filter_data_by_segment(temperature_intraday_data, time_segment) + + if not temperature_intraday_data.empty: + temperature_intraday_features = pd.DataFrame() + + # get stats of temperature + temperature_intraday_features = statsFeatures(temperature_intraday_data, features, temperature_intraday_features) + + temperature_intraday_features.reset_index(inplace=True) + + return temperature_intraday_features + def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): + temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) - sensor_data = pd.read_csv(sensor_data_files["sensor_data"]) - requested_features = provider["FEATURES"] + requested_intraday_features = provider["FEATURES"] # name of the features this function can compute - base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] + base_intraday_features_names = ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", + "diffminmodetemp", "entropytemp"] # the subset of requested features this function can compute - features_to_compute = list(set(requested_features) & set(base_features_names)) + intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names)) - features = pd.DataFrame(columns=["local_segment"] + features_to_compute) - if not sensor_data.empty: - sensor_data = filter_data_by_segment(sensor_data, time_segment) - - if not sensor_data.empty: - features = pd.DataFrame() - + # extract features from intraday data + temperature_intraday_features = extractTempFeaturesFromIntradayData(temperature_intraday_data, + intraday_features_to_compute, time_segment, + filter_data_by_segment) - return features \ No newline at end of file + return temperature_intraday_features \ No newline at end of file