diff --git a/Snakefile b/Snakefile index 25e0fc08..8d0870c3 100644 --- a/Snakefile +++ b/Snakefile @@ -317,6 +317,8 @@ for provider in config["EMPATICA_ACCELEROMETER"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/empatica_accelerometer.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/interim/{pid}/empatica_accelerometer_features/z_empatica_accelerometer_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) for provider in config["EMPATICA_HEARTRATE"]["PROVIDERS"].keys(): if config["EMPATICA_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]: @@ -336,6 +338,8 @@ for provider in config["EMPATICA_TEMPERATURE"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/empatica_temperature.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/interim/{pid}/empatica_temperature_features/z_empatica_temperature_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys(): if config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["COMPUTE"]: @@ -345,6 +349,8 @@ for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/empatica_electrodermal_activity.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/interim/{pid}/empatica_electrodermal_activity_features/z_empatica_electrodermal_activity_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys(): if config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["COMPUTE"]: @@ -354,7 +360,8 @@ for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/empatica_blood_volume_pulse.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") - files_to_compute.extend(expand("data/interim/{pid}/empatica_blood_volume_pulse_features/standardization_empatica_blood_volume_pulse_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["EMPATICA_STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) + if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/interim/{pid}/empatica_blood_volume_pulse_features/z_empatica_blood_volume_pulse_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys(): @@ -365,6 +372,8 @@ for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/empatica_inter_beat_interval.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/interim/{pid}/empatica_inter_beat_interval_features/z_empatica_inter_beat_interval_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) if isinstance(config["EMPATICA_TAGS"]["PROVIDERS"], dict): for provider in config["EMPATICA_TAGS"]["PROVIDERS"].keys(): @@ -404,6 +413,10 @@ for provider in config["ALL_CLEANING_OVERALL"]["PROVIDERS"].keys(): if config["ALL_CLEANING_OVERALL"]["PROVIDERS"][provider]["COMPUTE"]: files_to_compute.extend(expand("data/processed/features/all_participants/all_sensor_features_cleaned_" + provider.lower() +".csv")) +# Put the for loop over STANDARDIZATION providers if all are COMPUTE == True +# then merge all that are set to True in z_all_sensors for all and each participant +# See the logic behind: in each sensor the "data/processed/features/all_participants/all_sensor_features.csv" is listed + rule all: input: files_to_compute diff --git a/config.yaml b/config.yaml index 432b1364..7f235b12 100644 --- a/config.yaml +++ b/config.yaml @@ -484,7 +484,7 @@ EMPATICA_ACCELEROMETER: FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py CR: - COMPUTE: False + COMPUTE: True FEATURES: ["totalMagnitudeBand", "absoluteMeanBand", "varianceBand"] # Acc features WINDOWS: COMPUTE: True @@ -512,7 +512,7 @@ EMPATICA_TEMPERATURE: FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"] SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py CR: - COMPUTE: False + COMPUTE: True FEATURES: ["maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean", "stdDev", "median", "meanChange", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"] WINDOWS: @@ -531,7 +531,7 @@ EMPATICA_ELECTRODERMAL_ACTIVITY: FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"] SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py CR: - COMPUTE: False + COMPUTE: True FEATURES: ['mean', 'std', 'q25', 'q75', 'qd', 'deriv', 'power', 'numPeaks', 'ratePeaks', 'powerPeaks', 'sumPosDeriv', 'propPosDeriv', 'derivTonic', 'sigTonicDifference', 'freqFeats','maxPeakAmplitudeChangeBefore', 'maxPeakAmplitudeChangeAfter', 'avgPeakAmplitudeChangeBefore', 'avgPeakAmplitudeChangeAfter', 'avgPeakChangeRatio', 'maxPeakIncreaseTime', 'maxPeakDecreaseTime', 'maxPeakDuration', 'maxPeakChangeRatio', @@ -559,7 +559,7 @@ EMPATICA_BLOOD_VOLUME_PULSE: WINDOWS: COMPUTE: True WINDOW_LENGTH: 300 # specify window length in seconds - SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest_mean', 'nsmallest_mean', 'count_windows', 'hrv_num_windows_non_zero'] + SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest_mean', 'nsmallest_mean', 'count_windows', 'hrv_num_windows_non_nan'] STANDARDIZE_FEATURES: False SRC_SCRIPT: src/features/empatica_blood_volume_pulse/cr/main.py @@ -572,14 +572,14 @@ EMPATICA_INTER_BEAT_INTERVAL: FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"] SRC_SCRIPT: src/features/empatica_inter_beat_interval/dbdp/main.py CR: - COMPUTE: False + COMPUTE: True FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR', # Time features 'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features PATCH_WITH_BVP: True WINDOWS: COMPUTE: True WINDOW_LENGTH: 300 # specify window length in seconds - SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest_mean', 'nsmallest_mean', 'count_windows', 'hrv_num_windows_non_zero'] + SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest_mean', 'nsmallest_mean', 'count_windows', 'hrv_num_windows_non_nan'] STANDARDIZE_FEATURES: False SRC_SCRIPT: src/features/empatica_inter_beat_interval/cr/main.py @@ -669,11 +669,7 @@ ALL_CLEANING_OVERALL: # Z-score standardization # ######################################################################################################################## STANDARDIZATION: - COMPUTE: True - EXCECUTE_FULL_PIPELINE: False # Standardization to be calculated from feature extraction step including merging all sensors and participants steps (in seperate standardization file) -EMPATICA_STANDARDIZATION: PROVIDERS: CR: - STANDARDIZE: True - TYPE: FROM_FIRST_ORDER # FROM_FIRST_ORDER or FROM_SECOND_ORDER(not implemented) - SRC_SCRIPT: src/features/standardization/empatica_standardization/main.py + COMPUTE: True + SRC_SCRIPT: src/features/standardization/main.py diff --git a/rules/features.smk b/rules/features.smk index 6a1e0a97..0f8f4d73 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -775,6 +775,20 @@ rule empatica_accelerometer_python_features: script: "../src/features/entry.py" +rule empatica_accelerometer_python_features_standardization: + input: + windows_features_data = "data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}_windows.csv" + params: + provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"], + provider_key = "{provider_key}", + sensor_key = "empatica_accelerometer", + provider_main = config["EMPATICA_ACCELEROMETER"]["PROVIDERS"]["CR"] + output: + "data/interim/{pid}/empatica_accelerometer_features/z_empatica_accelerometer_python_{provider_key}.csv", + "data/interim/{pid}/empatica_accelerometer_features/z_empatica_accelerometer_python_{provider_key}_windows.csv" + script: + "../src/features/standardization/main.py" + rule empatica_accelerometer_r_features: input: sensor_data = "data/raw/{pid}/empatica_accelerometer_with_datetime.csv", @@ -829,6 +843,20 @@ rule empatica_temperature_python_features: script: "../src/features/entry.py" +rule empatica_temperature_python_features_standardization: + input: + windows_features_data = "data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}_windows.csv" + params: + provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"], + provider_key = "{provider_key}", + sensor_key = "empatica_temperature", + provider_main = config["EMPATICA_TEMPERATURE"]["PROVIDERS"]["CR"] + output: + "data/interim/{pid}/empatica_temperature_features/z_empatica_temperature_python_{provider_key}.csv", + "data/interim/{pid}/empatica_temperature_features/z_empatica_temperature_python_{provider_key}_windows.csv" + script: + "../src/features/standardization/main.py" + rule empatica_temperature_r_features: input: sensor_data = "data/raw/{pid}/empatica_temperature_with_datetime.csv", @@ -856,6 +884,20 @@ rule empatica_electrodermal_activity_python_features: script: "../src/features/entry.py" +rule empatica_electrodermal_activity_python_features_standardization: + input: + windows_features_data = "data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}_windows.csv" + params: + provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"], + provider_key = "{provider_key}", + sensor_key = "empatica_electrodermal_activity", + provider_main = config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"]["CR"] + output: + "data/interim/{pid}/empatica_electrodermal_activity_features/z_empatica_electrodermal_activity_python_{provider_key}.csv", + "data/interim/{pid}/empatica_electrodermal_activity_features/z_empatica_electrodermal_activity_python_{provider_key}_windows.csv" + script: + "../src/features/standardization/main.py" + rule empatica_electrodermal_activity_r_features: input: sensor_data = "data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv", @@ -883,6 +925,20 @@ rule empatica_blood_volume_pulse_python_features: script: "../src/features/entry.py" +rule empatica_blood_volume_pulse_python_cr_features_standardization: + input: + windows_features_data = "data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows.csv" + params: + provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"], + provider_key = "{provider_key}", + sensor_key = "empatica_blood_volume_pulse", + provider_main = config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"]["CR"] + output: + "data/interim/{pid}/empatica_blood_volume_pulse_features/z_empatica_blood_volume_pulse_python_{provider_key}.csv", + "data/interim/{pid}/empatica_blood_volume_pulse_features/z_empatica_blood_volume_pulse_python_{provider_key}_windows.csv" + script: + "../src/features/standardization/main.py" + rule empatica_blood_volume_pulse_r_features: input: sensor_data = "data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv", @@ -896,20 +952,6 @@ rule empatica_blood_volume_pulse_r_features: script: "../src/features/entry.R" -rule empatica_blood_volume_pulse_python_cr_features_standardization: - input: - windows_features_data = "data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows.csv" - params: - provider = config["EMPATICA_STANDARDIZATION"]["PROVIDERS"]["CR"], - provider_key = "{provider_key}", - sensor_key = "empatica_blood_volume_pulse", - provider_main = config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"]["CR"] - output: - "data/interim/{pid}/empatica_blood_volume_pulse_features/standardization_empatica_blood_volume_pulse_python_{provider_key}.csv", - "data/interim/{pid}/empatica_blood_volume_pulse_features/standardization_empatica_blood_volume_pulse_python_{provider_key}_windows.csv" - script: - "../src/features/standardization/empatica_standardization/main.py" - rule empatica_inter_beat_interval_python_features: input: sensor_data = "data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv", @@ -924,6 +966,20 @@ rule empatica_inter_beat_interval_python_features: script: "../src/features/entry.py" +rule empatica_inter_beat_interval_python_features_standardization: + input: + windows_features_data = "data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}_windows.csv" + params: + provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"], + provider_key = "{provider_key}", + sensor_key = "empatica_inter_beat_interval", + provider_main = config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"]["CR"] + output: + "data/interim/{pid}/empatica_inter_beat_interval_features/z_empatica_inter_beat_interval_python_{provider_key}.csv", + "data/interim/{pid}/empatica_inter_beat_interval_features/z_empatica_inter_beat_interval_python_{provider_key}_windows.csv" + script: + "../src/features/standardization/main.py" + rule empatica_inter_beat_interval_r_features: input: sensor_data = "data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv", diff --git a/src/features/standardization/empatica_standardization/main.py b/src/features/standardization/main.py similarity index 78% rename from src/features/standardization/empatica_standardization/main.py rename to src/features/standardization/main.py index 2e2fb0f6..b6026d0b 100644 --- a/src/features/standardization/empatica_standardization/main.py +++ b/src/features/standardization/main.py @@ -4,9 +4,6 @@ from sklearn.preprocessing import StandardScaler import sys -sys.path.append('/rapids/src/features/') -from cr_features_helper_methods import extract_second_order_features - sensor_data_files = dict(snakemake.input) provider = snakemake.params["provider"] @@ -16,12 +13,14 @@ sensor_key = snakemake.params["sensor_key"] pd.set_option('display.max_columns', None) if provider_key == "cr": + sys.path.append('/rapids/src/features/') + from cr_features_helper_methods import extract_second_order_features + provider_main = snakemake.params["provider_main"] prefix = sensor_key + "_" + provider_key + "_" windows_features_data = pd.read_csv(sensor_data_files["windows_features_data"]) - excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime', - prefix + "level_1"] + excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime', prefix + "level_1"] windows_features_data.loc[:, ~windows_features_data.columns.isin(excluded_columns)] = \ StandardScaler().fit_transform(windows_features_data.loc[:, ~windows_features_data.columns.isin(excluded_columns)]) @@ -30,4 +29,9 @@ if provider_key == "cr": if provider_main["WINDOWS"]["COMPUTE"] and "SECOND_ORDER_FEATURES" in provider_main["WINDOWS"]: so_features_names = provider_main["WINDOWS"]["SECOND_ORDER_FEATURES"] windows_so_features_data = extract_second_order_features(windows_features_data, so_features_names, prefix) - windows_so_features_data.to_csv(snakemake.output[0], index=False) \ No newline at end of file + windows_so_features_data.to_csv(snakemake.output[0], index=False) + else: + pd.DataFrame().to_csv(snakemake.output[0], index=False) + +else: + pass #To-Do for the rest of the sensors. \ No newline at end of file