Standardization as a Snakefile's rule enabled for all E4 sensors.
parent
402059871f
commit
9ea39dc557
15
Snakefile
15
Snakefile
|
@ -317,6 +317,8 @@ for provider in config["EMPATICA_ACCELEROMETER"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_accelerometer.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_accelerometer_features/z_empatica_accelerometer_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower()))
|
||||
|
||||
for provider in config["EMPATICA_HEARTRATE"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
|
@ -336,6 +338,8 @@ for provider in config["EMPATICA_TEMPERATURE"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_temperature.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_temperature_features/z_empatica_temperature_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower()))
|
||||
|
||||
for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
|
@ -345,6 +349,8 @@ for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_electrodermal_activity.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_electrodermal_activity_features/z_empatica_electrodermal_activity_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower()))
|
||||
|
||||
for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys():
|
||||
if config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
|
@ -354,7 +360,8 @@ for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_blood_volume_pulse.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_blood_volume_pulse_features/standardization_empatica_blood_volume_pulse_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["EMPATICA_STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower()))
|
||||
if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_blood_volume_pulse_features/z_empatica_blood_volume_pulse_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower()))
|
||||
|
||||
|
||||
for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys():
|
||||
|
@ -365,6 +372,8 @@ for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_inter_beat_interval.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/empatica_inter_beat_interval_features/z_empatica_inter_beat_interval_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower()))
|
||||
|
||||
if isinstance(config["EMPATICA_TAGS"]["PROVIDERS"], dict):
|
||||
for provider in config["EMPATICA_TAGS"]["PROVIDERS"].keys():
|
||||
|
@ -404,6 +413,10 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys():
|
|||
if config["ALL_CLEANING_OVERALL"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/processed/features/all_participants/all_sensor_features_cleaned_" + provider.lower() +".csv"))
|
||||
|
||||
# Put the for loop over STANDARDIZATION providers if all are COMPUTE == True
|
||||
# then merge all that are set to True in z_all_sensors for all and each participant
|
||||
# See the logic behind: in each sensor the "data/processed/features/all_participants/all_sensor_features.csv" is listed
|
||||
|
||||
rule all:
|
||||
input:
|
||||
files_to_compute
|
||||
|
|
20
config.yaml
20
config.yaml
|
@ -484,7 +484,7 @@ EMPATICA_ACCELEROMETER:
|
|||
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
||||
SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py
|
||||
CR:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["totalMagnitudeBand", "absoluteMeanBand", "varianceBand"] # Acc features
|
||||
WINDOWS:
|
||||
COMPUTE: True
|
||||
|
@ -512,7 +512,7 @@ EMPATICA_TEMPERATURE:
|
|||
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
|
||||
SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
|
||||
CR:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean",
|
||||
"stdDev", "median", "meanChange", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"]
|
||||
WINDOWS:
|
||||
|
@ -531,7 +531,7 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
|
|||
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
|
||||
SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py
|
||||
CR:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ['mean', 'std', 'q25', 'q75', 'qd', 'deriv', 'power', 'numPeaks', 'ratePeaks', 'powerPeaks', 'sumPosDeriv', 'propPosDeriv', 'derivTonic',
|
||||
'sigTonicDifference', 'freqFeats','maxPeakAmplitudeChangeBefore', 'maxPeakAmplitudeChangeAfter', 'avgPeakAmplitudeChangeBefore',
|
||||
'avgPeakAmplitudeChangeAfter', 'avgPeakChangeRatio', 'maxPeakIncreaseTime', 'maxPeakDecreaseTime', 'maxPeakDuration', 'maxPeakChangeRatio',
|
||||
|
@ -559,7 +559,7 @@ EMPATICA_BLOOD_VOLUME_PULSE:
|
|||
WINDOWS:
|
||||
COMPUTE: True
|
||||
WINDOW_LENGTH: 300 # specify window length in seconds
|
||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest_mean', 'nsmallest_mean', 'count_windows', 'hrv_num_windows_non_zero']
|
||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest_mean', 'nsmallest_mean', 'count_windows', 'hrv_num_windows_non_nan']
|
||||
STANDARDIZE_FEATURES: False
|
||||
SRC_SCRIPT: src/features/empatica_blood_volume_pulse/cr/main.py
|
||||
|
||||
|
@ -572,14 +572,14 @@ EMPATICA_INTER_BEAT_INTERVAL:
|
|||
FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"]
|
||||
SRC_SCRIPT: src/features/empatica_inter_beat_interval/dbdp/main.py
|
||||
CR:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR', # Time features
|
||||
'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features
|
||||
PATCH_WITH_BVP: True
|
||||
WINDOWS:
|
||||
COMPUTE: True
|
||||
WINDOW_LENGTH: 300 # specify window length in seconds
|
||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest_mean', 'nsmallest_mean', 'count_windows', 'hrv_num_windows_non_zero']
|
||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest_mean', 'nsmallest_mean', 'count_windows', 'hrv_num_windows_non_nan']
|
||||
STANDARDIZE_FEATURES: False
|
||||
SRC_SCRIPT: src/features/empatica_inter_beat_interval/cr/main.py
|
||||
|
||||
|
@ -669,11 +669,7 @@ ALL_CLEANING_OVERALL:
|
|||
# Z-score standardization #
|
||||
########################################################################################################################
|
||||
STANDARDIZATION:
|
||||
COMPUTE: True
|
||||
EXCECUTE_FULL_PIPELINE: False # Standardization to be calculated from feature extraction step including merging all sensors and participants steps (in seperate standardization file)
|
||||
EMPATICA_STANDARDIZATION:
|
||||
PROVIDERS:
|
||||
CR:
|
||||
STANDARDIZE: True
|
||||
TYPE: FROM_FIRST_ORDER # FROM_FIRST_ORDER or FROM_SECOND_ORDER(not implemented)
|
||||
SRC_SCRIPT: src/features/standardization/empatica_standardization/main.py
|
||||
COMPUTE: True
|
||||
SRC_SCRIPT: src/features/standardization/main.py
|
||||
|
|
|
@ -775,6 +775,20 @@ rule empatica_accelerometer_python_features:
|
|||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule empatica_accelerometer_python_features_standardization:
|
||||
input:
|
||||
windows_features_data = "data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}_windows.csv"
|
||||
params:
|
||||
provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "empatica_accelerometer",
|
||||
provider_main = config["EMPATICA_ACCELEROMETER"]["PROVIDERS"]["CR"]
|
||||
output:
|
||||
"data/interim/{pid}/empatica_accelerometer_features/z_empatica_accelerometer_python_{provider_key}.csv",
|
||||
"data/interim/{pid}/empatica_accelerometer_features/z_empatica_accelerometer_python_{provider_key}_windows.csv"
|
||||
script:
|
||||
"../src/features/standardization/main.py"
|
||||
|
||||
rule empatica_accelerometer_r_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/empatica_accelerometer_with_datetime.csv",
|
||||
|
@ -829,6 +843,20 @@ rule empatica_temperature_python_features:
|
|||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule empatica_temperature_python_features_standardization:
|
||||
input:
|
||||
windows_features_data = "data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}_windows.csv"
|
||||
params:
|
||||
provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "empatica_temperature",
|
||||
provider_main = config["EMPATICA_TEMPERATURE"]["PROVIDERS"]["CR"]
|
||||
output:
|
||||
"data/interim/{pid}/empatica_temperature_features/z_empatica_temperature_python_{provider_key}.csv",
|
||||
"data/interim/{pid}/empatica_temperature_features/z_empatica_temperature_python_{provider_key}_windows.csv"
|
||||
script:
|
||||
"../src/features/standardization/main.py"
|
||||
|
||||
rule empatica_temperature_r_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/empatica_temperature_with_datetime.csv",
|
||||
|
@ -856,6 +884,20 @@ rule empatica_electrodermal_activity_python_features:
|
|||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule empatica_electrodermal_activity_python_features_standardization:
|
||||
input:
|
||||
windows_features_data = "data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}_windows.csv"
|
||||
params:
|
||||
provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "empatica_electrodermal_activity",
|
||||
provider_main = config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"]["CR"]
|
||||
output:
|
||||
"data/interim/{pid}/empatica_electrodermal_activity_features/z_empatica_electrodermal_activity_python_{provider_key}.csv",
|
||||
"data/interim/{pid}/empatica_electrodermal_activity_features/z_empatica_electrodermal_activity_python_{provider_key}_windows.csv"
|
||||
script:
|
||||
"../src/features/standardization/main.py"
|
||||
|
||||
rule empatica_electrodermal_activity_r_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv",
|
||||
|
@ -883,6 +925,20 @@ rule empatica_blood_volume_pulse_python_features:
|
|||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule empatica_blood_volume_pulse_python_cr_features_standardization:
|
||||
input:
|
||||
windows_features_data = "data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows.csv"
|
||||
params:
|
||||
provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "empatica_blood_volume_pulse",
|
||||
provider_main = config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"]["CR"]
|
||||
output:
|
||||
"data/interim/{pid}/empatica_blood_volume_pulse_features/z_empatica_blood_volume_pulse_python_{provider_key}.csv",
|
||||
"data/interim/{pid}/empatica_blood_volume_pulse_features/z_empatica_blood_volume_pulse_python_{provider_key}_windows.csv"
|
||||
script:
|
||||
"../src/features/standardization/main.py"
|
||||
|
||||
rule empatica_blood_volume_pulse_r_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv",
|
||||
|
@ -896,20 +952,6 @@ rule empatica_blood_volume_pulse_r_features:
|
|||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule empatica_blood_volume_pulse_python_cr_features_standardization:
|
||||
input:
|
||||
windows_features_data = "data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows.csv"
|
||||
params:
|
||||
provider = config["EMPATICA_STANDARDIZATION"]["PROVIDERS"]["CR"],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "empatica_blood_volume_pulse",
|
||||
provider_main = config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"]["CR"]
|
||||
output:
|
||||
"data/interim/{pid}/empatica_blood_volume_pulse_features/standardization_empatica_blood_volume_pulse_python_{provider_key}.csv",
|
||||
"data/interim/{pid}/empatica_blood_volume_pulse_features/standardization_empatica_blood_volume_pulse_python_{provider_key}_windows.csv"
|
||||
script:
|
||||
"../src/features/standardization/empatica_standardization/main.py"
|
||||
|
||||
rule empatica_inter_beat_interval_python_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv",
|
||||
|
@ -924,6 +966,20 @@ rule empatica_inter_beat_interval_python_features:
|
|||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule empatica_inter_beat_interval_python_features_standardization:
|
||||
input:
|
||||
windows_features_data = "data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}_windows.csv"
|
||||
params:
|
||||
provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "empatica_inter_beat_interval",
|
||||
provider_main = config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"]["CR"]
|
||||
output:
|
||||
"data/interim/{pid}/empatica_inter_beat_interval_features/z_empatica_inter_beat_interval_python_{provider_key}.csv",
|
||||
"data/interim/{pid}/empatica_inter_beat_interval_features/z_empatica_inter_beat_interval_python_{provider_key}_windows.csv"
|
||||
script:
|
||||
"../src/features/standardization/main.py"
|
||||
|
||||
rule empatica_inter_beat_interval_r_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv",
|
||||
|
|
|
@ -4,9 +4,6 @@ from sklearn.preprocessing import StandardScaler
|
|||
|
||||
import sys
|
||||
|
||||
sys.path.append('/rapids/src/features/')
|
||||
from cr_features_helper_methods import extract_second_order_features
|
||||
|
||||
sensor_data_files = dict(snakemake.input)
|
||||
|
||||
provider = snakemake.params["provider"]
|
||||
|
@ -16,12 +13,14 @@ sensor_key = snakemake.params["sensor_key"]
|
|||
pd.set_option('display.max_columns', None)
|
||||
|
||||
if provider_key == "cr":
|
||||
sys.path.append('/rapids/src/features/')
|
||||
from cr_features_helper_methods import extract_second_order_features
|
||||
|
||||
provider_main = snakemake.params["provider_main"]
|
||||
prefix = sensor_key + "_" + provider_key + "_"
|
||||
|
||||
windows_features_data = pd.read_csv(sensor_data_files["windows_features_data"])
|
||||
excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime',
|
||||
prefix + "level_1"]
|
||||
excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime', prefix + "level_1"]
|
||||
windows_features_data.loc[:, ~windows_features_data.columns.isin(excluded_columns)] = \
|
||||
StandardScaler().fit_transform(windows_features_data.loc[:, ~windows_features_data.columns.isin(excluded_columns)])
|
||||
|
||||
|
@ -30,4 +29,9 @@ if provider_key == "cr":
|
|||
if provider_main["WINDOWS"]["COMPUTE"] and "SECOND_ORDER_FEATURES" in provider_main["WINDOWS"]:
|
||||
so_features_names = provider_main["WINDOWS"]["SECOND_ORDER_FEATURES"]
|
||||
windows_so_features_data = extract_second_order_features(windows_features_data, so_features_names, prefix)
|
||||
windows_so_features_data.to_csv(snakemake.output[0], index=False)
|
||||
windows_so_features_data.to_csv(snakemake.output[0], index=False)
|
||||
else:
|
||||
pd.DataFrame().to_csv(snakemake.output[0], index=False)
|
||||
|
||||
else:
|
||||
pass #To-Do for the rest of the sensors.
|
Loading…
Reference in New Issue