Add option to calculate features within windows and store it in CSV (all sensors).

sociality-task
= 2022-04-13 13:18:23 +00:00
parent 74cf4ada1c
commit 3c058e4463
8 changed files with 51 additions and 21 deletions

View File

@ -481,7 +481,7 @@ EMPATICA_ACCELEROMETER:
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py
CR: CR:
COMPUTE: True COMPUTE: False
FEATURES: ["fqHighestPeakFreqs", "fqHighestPeaks", "fqEnergyFeat", "fqEntropyFeat", "fqHistogramBins","fqAbsMean", "fqSkewness", "fqKurtosis", "fqInterquart", # Freq features FEATURES: ["fqHighestPeakFreqs", "fqHighestPeaks", "fqEnergyFeat", "fqEntropyFeat", "fqHistogramBins","fqAbsMean", "fqSkewness", "fqKurtosis", "fqInterquart", # Freq features
"meanLow", "areaLow", "totalAbsoluteAreaBand", "totalMagnitudeBand", "entropyBand", "skewnessBand", "kurtosisBand", "meanLow", "areaLow", "totalAbsoluteAreaBand", "totalMagnitudeBand", "entropyBand", "skewnessBand", "kurtosisBand",
"postureDistanceLow", "absoluteMeanBand", "absoluteAreaBand", "quartilesBand", "interQuartileRangeBand", "varianceBand", "postureDistanceLow", "absoluteMeanBand", "absoluteAreaBand", "quartilesBand", "interQuartileRangeBand", "varianceBand",
@ -510,7 +510,7 @@ EMPATICA_TEMPERATURE:
CONTAINER: TEMP CONTAINER: TEMP
PROVIDERS: PROVIDERS:
DBDP: DBDP:
COMPUTE: False COMPUTE: True
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"] FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
CR: CR:
@ -521,7 +521,7 @@ EMPATICA_TEMPERATURE:
"calcMeanCrossingRateAutocorr", "countAboveMeanAutocorr", "sumPer", "sumSquared", "squareSumOfComponent", "calcMeanCrossingRateAutocorr", "countAboveMeanAutocorr", "sumPer", "sumSquared", "squareSumOfComponent",
"sumOfSquareComponents"] "sumOfSquareComponents"]
WINDOWS: WINDOWS:
COMPUTE: False COMPUTE: True
WINDOW_LENGTH: 90 # specify window length in seconds WINDOW_LENGTH: 90 # specify window length in seconds
SRC_SCRIPT: src/features/empatica_temperature/cr/main.py SRC_SCRIPT: src/features/empatica_temperature/cr/main.py
@ -530,7 +530,7 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
CONTAINER: EDA CONTAINER: EDA
PROVIDERS: PROVIDERS:
DBDP: DBDP:
COMPUTE: False COMPUTE: True
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"] FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py
CR: CR:
@ -541,7 +541,7 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
'avgPeakIncreaseTime', 'avgPeakDecreaseTime', 'avgPeakDuration', 'maxPeakResponseSlopeBefore', 'maxPeakResponseSlopeAfter', 'avgPeakIncreaseTime', 'avgPeakDecreaseTime', 'avgPeakDuration', 'maxPeakResponseSlopeBefore', 'maxPeakResponseSlopeAfter',
'signalOverallChange', 'changeDuration', 'changeRate', 'significantIncrease', 'significantDecrease'] 'signalOverallChange', 'changeDuration', 'changeRate', 'significantIncrease', 'significantDecrease']
WINDOWS: WINDOWS:
COMPUTE: False COMPUTE: True
WINDOW_LENGTH: 80 # specify window length in seconds WINDOW_LENGTH: 80 # specify window length in seconds
SRC_SCRIPT: src/features/empatica_electrodermal_activity/cr/main.py SRC_SCRIPT: src/features/empatica_electrodermal_activity/cr/main.py
@ -550,7 +550,7 @@ EMPATICA_BLOOD_VOLUME_PULSE:
CONTAINER: BVP CONTAINER: BVP
PROVIDERS: PROVIDERS:
DBDP: DBDP:
COMPUTE: False COMPUTE: True
FEATURES: ["fqHighestPeakFreqs", "fqHighestPeaks", "fqEnergyFeat", "fqEntropyFeat", "fqHistogramBins","fqAbsMean", "fqSkewness", "fqKurtosis", "fqInterquart", # Freq features FEATURES: ["fqHighestPeakFreqs", "fqHighestPeaks", "fqEnergyFeat", "fqEntropyFeat", "fqHistogramBins","fqAbsMean", "fqSkewness", "fqKurtosis", "fqInterquart", # Freq features
"maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"] # HRV features "maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"] # HRV features
SRC_SCRIPT: src/features/empatica_blood_volume_pulse/dbdp/main.py SRC_SCRIPT: src/features/empatica_blood_volume_pulse/dbdp/main.py
@ -558,7 +558,7 @@ EMPATICA_BLOOD_VOLUME_PULSE:
COMPUTE: True COMPUTE: True
FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR'] FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR']
WINDOWS: WINDOWS:
COMPUTE: False COMPUTE: True
WINDOW_LENGTH: 4 # specify window length in seconds WINDOW_LENGTH: 4 # specify window length in seconds
SRC_SCRIPT: src/features/empatica_blood_volume_pulse/cr/main.py SRC_SCRIPT: src/features/empatica_blood_volume_pulse/cr/main.py

View File

@ -770,7 +770,8 @@ rule empatica_accelerometer_python_features:
provider_key = "{provider_key}", provider_key = "{provider_key}",
sensor_key = "empatica_accelerometer" sensor_key = "empatica_accelerometer"
output: output:
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}.csv" "data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}.csv",
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}_windows.csv"
script: script:
"../src/features/entry.py" "../src/features/entry.py"
@ -796,7 +797,8 @@ rule empatica_heartrate_python_features:
provider_key = "{provider_key}", provider_key = "{provider_key}",
sensor_key = "empatica_heartrate" sensor_key = "empatica_heartrate"
output: output:
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}.csv" "data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}.csv",
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}_windows.csv"
script: script:
"../src/features/entry.py" "../src/features/entry.py"
@ -822,7 +824,8 @@ rule empatica_temperature_python_features:
provider_key = "{provider_key}", provider_key = "{provider_key}",
sensor_key = "empatica_temperature" sensor_key = "empatica_temperature"
output: output:
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}.csv" "data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}.csv",
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}_windows.csv"
script: script:
"../src/features/entry.py" "../src/features/entry.py"
@ -848,7 +851,8 @@ rule empatica_electrodermal_activity_python_features:
provider_key = "{provider_key}", provider_key = "{provider_key}",
sensor_key = "empatica_electrodermal_activity" sensor_key = "empatica_electrodermal_activity"
output: output:
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}.csv" "data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}.csv",
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}_windows.csv"
script: script:
"../src/features/entry.py" "../src/features/entry.py"
@ -874,7 +878,8 @@ rule empatica_blood_volume_pulse_python_features:
provider_key = "{provider_key}", provider_key = "{provider_key}",
sensor_key = "empatica_blood_volume_pulse" sensor_key = "empatica_blood_volume_pulse"
output: output:
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}.csv" "data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}.csv",
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows.csv"
script: script:
"../src/features/entry.py" "../src/features/entry.py"
@ -900,7 +905,8 @@ rule empatica_inter_beat_interval_python_features:
provider_key = "{provider_key}", provider_key = "{provider_key}",
sensor_key = "empatica_inter_beat_interval" sensor_key = "empatica_inter_beat_interval"
output: output:
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}.csv" "data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}.csv",
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}_windows.csv"
script: script:
"../src/features/entry.py" "../src/features/entry.py"

View File

@ -53,7 +53,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
requested_intraday_features = provider["FEATURES"] requested_intraday_features = provider["FEATURES"]
if provider["WINDOWS"]["COMPUTE"]: calc_windows = kwargs.get('calc_windows', False)
if provider["WINDOWS"]["COMPUTE"] and calc_windows:
requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"] requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"]
else: else:
requested_window_length = None requested_window_length = None

View File

@ -47,7 +47,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
requested_intraday_features = provider["FEATURES"] requested_intraday_features = provider["FEATURES"]
if provider["WINDOWS"]["COMPUTE"]: calc_windows = kwargs.get('calc_windows', False)
if provider["WINDOWS"]["COMPUTE"] and calc_windows:
requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"] requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"]
else: else:
requested_window_length = None requested_window_length = None

View File

@ -46,7 +46,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
requested_intraday_features = provider["FEATURES"] requested_intraday_features = provider["FEATURES"]
if provider["WINDOWS"]["COMPUTE"]: calc_windows = kwargs.get('calc_windows', False)
if provider["WINDOWS"]["COMPUTE"] and calc_windows:
requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"] requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"]
else: else:
requested_window_length = None requested_window_length = None

View File

@ -45,7 +45,10 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_intraday_features = provider["FEATURES"] requested_intraday_features = provider["FEATURES"]
if provider["WINDOWS"]["COMPUTE"]:
calc_windows = kwargs.get('calc_windows', False)
if provider["WINDOWS"]["COMPUTE"] and calc_windows:
requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"] requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"]
else: else:
requested_window_length = None requested_window_length = None

View File

@ -7,6 +7,13 @@ provider = snakemake.params["provider"]
provider_key = snakemake.params["provider_key"] provider_key = snakemake.params["provider_key"]
sensor_key = snakemake.params["sensor_key"] sensor_key = snakemake.params["sensor_key"]
calc_windows = False
try:
calc_windows = provider["WINDOWS"]["COMPUTE"]
except KeyError:
print("Compute window key not found in config.yaml!")
if sensor_key == "all_cleaning_individual" or sensor_key == "all_cleaning_overall": if sensor_key == "all_cleaning_individual" or sensor_key == "all_cleaning_overall":
# Data cleaning # Data cleaning
sensor_features = run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files) sensor_features = run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files)
@ -14,6 +21,14 @@ else:
# Extract sensor features # Extract sensor features
del sensor_data_files["time_segments_labels"] del sensor_data_files["time_segments_labels"]
time_segments_file = snakemake.input["time_segments_labels"] time_segments_file = snakemake.input["time_segments_labels"]
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file)
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=calc_windows)
# Calculation over multiple windows in case of Empatica's CR-features
if calc_windows:
sensor_features.to_csv(snakemake.output[1], index=False)
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=False)
elif "empatica" in sensor_key and provider_key == "dbdp":
pd.DataFrame().to_csv(snakemake.output[1], index=False)
sensor_features.to_csv(snakemake.output[0], index=False) sensor_features.to_csv(snakemake.output[0], index=False)

View File

@ -88,7 +88,7 @@ def chunk_episodes(sensor_episodes):
return merged_sensor_episodes return merged_sensor_episodes
def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file): def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=False):
import pandas as pd import pandas as pd
from importlib import import_module, util from importlib import import_module, util
@ -106,7 +106,7 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file
time_segments_labels["label"] = [""] time_segments_labels["label"] = [""]
for time_segment in time_segments_labels["label"]: for time_segment in time_segments_labels["label"]:
print("{} Processing {} {} {}".format(rapids_log_tag, sensor_key, provider_key, time_segment)) print("{} Processing {} {} {}".format(rapids_log_tag, sensor_key, provider_key, time_segment))
features = feature_function(sensor_data_files, time_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes) features = feature_function(sensor_data_files, time_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes, calc_windows=calc_windows)
if not "local_segment" in features.columns: if not "local_segment" in features.columns:
raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)") raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)")
features.columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features.columns] features.columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features.columns]