diff --git a/config.yaml b/config.yaml index 77d36d68..12e70b42 100644 --- a/config.yaml +++ b/config.yaml @@ -481,7 +481,7 @@ EMPATICA_ACCELEROMETER: FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py CR: - COMPUTE: True + COMPUTE: False FEATURES: ["fqHighestPeakFreqs", "fqHighestPeaks", "fqEnergyFeat", "fqEntropyFeat", "fqHistogramBins","fqAbsMean", "fqSkewness", "fqKurtosis", "fqInterquart", # Freq features "meanLow", "areaLow", "totalAbsoluteAreaBand", "totalMagnitudeBand", "entropyBand", "skewnessBand", "kurtosisBand", "postureDistanceLow", "absoluteMeanBand", "absoluteAreaBand", "quartilesBand", "interQuartileRangeBand", "varianceBand", @@ -510,7 +510,7 @@ EMPATICA_TEMPERATURE: CONTAINER: TEMP PROVIDERS: DBDP: - COMPUTE: False + COMPUTE: True FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"] SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py CR: @@ -521,7 +521,7 @@ EMPATICA_TEMPERATURE: "calcMeanCrossingRateAutocorr", "countAboveMeanAutocorr", "sumPer", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"] WINDOWS: - COMPUTE: False + COMPUTE: True WINDOW_LENGTH: 90 # specify window length in seconds SRC_SCRIPT: src/features/empatica_temperature/cr/main.py @@ -530,7 +530,7 @@ EMPATICA_ELECTRODERMAL_ACTIVITY: CONTAINER: EDA PROVIDERS: DBDP: - COMPUTE: False + COMPUTE: True FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"] SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py CR: @@ -541,7 +541,7 @@ EMPATICA_ELECTRODERMAL_ACTIVITY: 'avgPeakIncreaseTime', 'avgPeakDecreaseTime', 'avgPeakDuration', 'maxPeakResponseSlopeBefore', 'maxPeakResponseSlopeAfter', 'signalOverallChange', 'changeDuration', 'changeRate', 'significantIncrease', 'significantDecrease'] WINDOWS: - COMPUTE: False + COMPUTE: True WINDOW_LENGTH: 80 # specify window length in seconds SRC_SCRIPT: src/features/empatica_electrodermal_activity/cr/main.py @@ -550,7 +550,7 @@ EMPATICA_BLOOD_VOLUME_PULSE: CONTAINER: BVP PROVIDERS: DBDP: - COMPUTE: False + COMPUTE: True FEATURES: ["fqHighestPeakFreqs", "fqHighestPeaks", "fqEnergyFeat", "fqEntropyFeat", "fqHistogramBins","fqAbsMean", "fqSkewness", "fqKurtosis", "fqInterquart", # Freq features "maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"] # HRV features SRC_SCRIPT: src/features/empatica_blood_volume_pulse/dbdp/main.py @@ -558,7 +558,7 @@ EMPATICA_BLOOD_VOLUME_PULSE: COMPUTE: True FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR'] WINDOWS: - COMPUTE: False + COMPUTE: True WINDOW_LENGTH: 4 # specify window length in seconds SRC_SCRIPT: src/features/empatica_blood_volume_pulse/cr/main.py diff --git a/rules/features.smk b/rules/features.smk index f0fea945..1b6e0ad8 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -770,7 +770,8 @@ rule empatica_accelerometer_python_features: provider_key = "{provider_key}", sensor_key = "empatica_accelerometer" output: - "data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}.csv" + "data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}.csv", + "data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}_windows.csv" script: "../src/features/entry.py" @@ -796,7 +797,8 @@ rule empatica_heartrate_python_features: provider_key = "{provider_key}", sensor_key = "empatica_heartrate" output: - "data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}.csv" + "data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}.csv", + "data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}_windows.csv" script: "../src/features/entry.py" @@ -822,7 +824,8 @@ rule empatica_temperature_python_features: provider_key = "{provider_key}", sensor_key = "empatica_temperature" output: - "data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}.csv" + "data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}.csv", + "data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}_windows.csv" script: "../src/features/entry.py" @@ -848,7 +851,8 @@ rule empatica_electrodermal_activity_python_features: provider_key = "{provider_key}", sensor_key = "empatica_electrodermal_activity" output: - "data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}.csv" + "data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}.csv", + "data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}_windows.csv" script: "../src/features/entry.py" @@ -874,7 +878,8 @@ rule empatica_blood_volume_pulse_python_features: provider_key = "{provider_key}", sensor_key = "empatica_blood_volume_pulse" output: - "data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}.csv" + "data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}.csv", + "data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows.csv" script: "../src/features/entry.py" @@ -900,7 +905,8 @@ rule empatica_inter_beat_interval_python_features: provider_key = "{provider_key}", sensor_key = "empatica_inter_beat_interval" output: - "data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}.csv" + "data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}.csv", + "data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}_windows.csv" script: "../src/features/entry.py" diff --git a/src/features/empatica_accelerometer/cr/main.py b/src/features/empatica_accelerometer/cr/main.py index 43812eb8..9618a88c 100644 --- a/src/features/empatica_accelerometer/cr/main.py +++ b/src/features/empatica_accelerometer/cr/main.py @@ -53,7 +53,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen requested_intraday_features = provider["FEATURES"] - if provider["WINDOWS"]["COMPUTE"]: + calc_windows = kwargs.get('calc_windows', False) + + if provider["WINDOWS"]["COMPUTE"] and calc_windows: requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"] else: requested_window_length = None diff --git a/src/features/empatica_blood_volume_pulse/cr/main.py b/src/features/empatica_blood_volume_pulse/cr/main.py index 8b7f0c81..6d8afcd4 100644 --- a/src/features/empatica_blood_volume_pulse/cr/main.py +++ b/src/features/empatica_blood_volume_pulse/cr/main.py @@ -47,7 +47,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen requested_intraday_features = provider["FEATURES"] - if provider["WINDOWS"]["COMPUTE"]: + calc_windows = kwargs.get('calc_windows', False) + + if provider["WINDOWS"]["COMPUTE"] and calc_windows: requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"] else: requested_window_length = None diff --git a/src/features/empatica_electrodermal_activity/cr/main.py b/src/features/empatica_electrodermal_activity/cr/main.py index c4f11349..b621f040 100644 --- a/src/features/empatica_electrodermal_activity/cr/main.py +++ b/src/features/empatica_electrodermal_activity/cr/main.py @@ -46,7 +46,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen requested_intraday_features = provider["FEATURES"] - if provider["WINDOWS"]["COMPUTE"]: + calc_windows = kwargs.get('calc_windows', False) + + if provider["WINDOWS"]["COMPUTE"] and calc_windows: requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"] else: requested_window_length = None diff --git a/src/features/empatica_temperature/cr/main.py b/src/features/empatica_temperature/cr/main.py index b804298c..20a8af85 100644 --- a/src/features/empatica_temperature/cr/main.py +++ b/src/features/empatica_temperature/cr/main.py @@ -45,7 +45,10 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) requested_intraday_features = provider["FEATURES"] - if provider["WINDOWS"]["COMPUTE"]: + + calc_windows = kwargs.get('calc_windows', False) + + if provider["WINDOWS"]["COMPUTE"] and calc_windows: requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"] else: requested_window_length = None diff --git a/src/features/entry.py b/src/features/entry.py index 2f65f8ad..761235f1 100644 --- a/src/features/entry.py +++ b/src/features/entry.py @@ -7,6 +7,13 @@ provider = snakemake.params["provider"] provider_key = snakemake.params["provider_key"] sensor_key = snakemake.params["sensor_key"] +calc_windows = False +try: + calc_windows = provider["WINDOWS"]["COMPUTE"] +except KeyError: + print("Compute window key not found in config.yaml!") + + if sensor_key == "all_cleaning_individual" or sensor_key == "all_cleaning_overall": # Data cleaning sensor_features = run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files) @@ -14,6 +21,14 @@ else: # Extract sensor features del sensor_data_files["time_segments_labels"] time_segments_file = snakemake.input["time_segments_labels"] - sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file) -sensor_features.to_csv(snakemake.output[0], index=False) \ No newline at end of file + sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=calc_windows) + # Calculation over multiple windows in case of Empatica's CR-features + if calc_windows: + sensor_features.to_csv(snakemake.output[1], index=False) + sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=False) + elif "empatica" in sensor_key and provider_key == "dbdp": + pd.DataFrame().to_csv(snakemake.output[1], index=False) + + +sensor_features.to_csv(snakemake.output[0], index=False) diff --git a/src/features/utils/utils.py b/src/features/utils/utils.py index d2986699..063afc9d 100644 --- a/src/features/utils/utils.py +++ b/src/features/utils/utils.py @@ -88,7 +88,7 @@ def chunk_episodes(sensor_episodes): return merged_sensor_episodes -def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file): +def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=False): import pandas as pd from importlib import import_module, util @@ -106,7 +106,7 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file time_segments_labels["label"] = [""] for time_segment in time_segments_labels["label"]: print("{} Processing {} {} {}".format(rapids_log_tag, sensor_key, provider_key, time_segment)) - features = feature_function(sensor_data_files, time_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes) + features = feature_function(sensor_data_files, time_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes, calc_windows=calc_windows) if not "local_segment" in features.columns: raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)") features.columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features.columns]