Add option to calculate features within windows and store it in CSV (all sensors).
parent
74cf4ada1c
commit
3c058e4463
14
config.yaml
14
config.yaml
|
@ -481,7 +481,7 @@ EMPATICA_ACCELEROMETER:
|
||||||
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
||||||
SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py
|
SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py
|
||||||
CR:
|
CR:
|
||||||
COMPUTE: True
|
COMPUTE: False
|
||||||
FEATURES: ["fqHighestPeakFreqs", "fqHighestPeaks", "fqEnergyFeat", "fqEntropyFeat", "fqHistogramBins","fqAbsMean", "fqSkewness", "fqKurtosis", "fqInterquart", # Freq features
|
FEATURES: ["fqHighestPeakFreqs", "fqHighestPeaks", "fqEnergyFeat", "fqEntropyFeat", "fqHistogramBins","fqAbsMean", "fqSkewness", "fqKurtosis", "fqInterquart", # Freq features
|
||||||
"meanLow", "areaLow", "totalAbsoluteAreaBand", "totalMagnitudeBand", "entropyBand", "skewnessBand", "kurtosisBand",
|
"meanLow", "areaLow", "totalAbsoluteAreaBand", "totalMagnitudeBand", "entropyBand", "skewnessBand", "kurtosisBand",
|
||||||
"postureDistanceLow", "absoluteMeanBand", "absoluteAreaBand", "quartilesBand", "interQuartileRangeBand", "varianceBand",
|
"postureDistanceLow", "absoluteMeanBand", "absoluteAreaBand", "quartilesBand", "interQuartileRangeBand", "varianceBand",
|
||||||
|
@ -510,7 +510,7 @@ EMPATICA_TEMPERATURE:
|
||||||
CONTAINER: TEMP
|
CONTAINER: TEMP
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
DBDP:
|
DBDP:
|
||||||
COMPUTE: False
|
COMPUTE: True
|
||||||
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
|
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
|
||||||
SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
|
SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
|
||||||
CR:
|
CR:
|
||||||
|
@ -521,7 +521,7 @@ EMPATICA_TEMPERATURE:
|
||||||
"calcMeanCrossingRateAutocorr", "countAboveMeanAutocorr", "sumPer", "sumSquared", "squareSumOfComponent",
|
"calcMeanCrossingRateAutocorr", "countAboveMeanAutocorr", "sumPer", "sumSquared", "squareSumOfComponent",
|
||||||
"sumOfSquareComponents"]
|
"sumOfSquareComponents"]
|
||||||
WINDOWS:
|
WINDOWS:
|
||||||
COMPUTE: False
|
COMPUTE: True
|
||||||
WINDOW_LENGTH: 90 # specify window length in seconds
|
WINDOW_LENGTH: 90 # specify window length in seconds
|
||||||
SRC_SCRIPT: src/features/empatica_temperature/cr/main.py
|
SRC_SCRIPT: src/features/empatica_temperature/cr/main.py
|
||||||
|
|
||||||
|
@ -530,7 +530,7 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
|
||||||
CONTAINER: EDA
|
CONTAINER: EDA
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
DBDP:
|
DBDP:
|
||||||
COMPUTE: False
|
COMPUTE: True
|
||||||
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
|
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
|
||||||
SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py
|
SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py
|
||||||
CR:
|
CR:
|
||||||
|
@ -541,7 +541,7 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
|
||||||
'avgPeakIncreaseTime', 'avgPeakDecreaseTime', 'avgPeakDuration', 'maxPeakResponseSlopeBefore', 'maxPeakResponseSlopeAfter',
|
'avgPeakIncreaseTime', 'avgPeakDecreaseTime', 'avgPeakDuration', 'maxPeakResponseSlopeBefore', 'maxPeakResponseSlopeAfter',
|
||||||
'signalOverallChange', 'changeDuration', 'changeRate', 'significantIncrease', 'significantDecrease']
|
'signalOverallChange', 'changeDuration', 'changeRate', 'significantIncrease', 'significantDecrease']
|
||||||
WINDOWS:
|
WINDOWS:
|
||||||
COMPUTE: False
|
COMPUTE: True
|
||||||
WINDOW_LENGTH: 80 # specify window length in seconds
|
WINDOW_LENGTH: 80 # specify window length in seconds
|
||||||
SRC_SCRIPT: src/features/empatica_electrodermal_activity/cr/main.py
|
SRC_SCRIPT: src/features/empatica_electrodermal_activity/cr/main.py
|
||||||
|
|
||||||
|
@ -550,7 +550,7 @@ EMPATICA_BLOOD_VOLUME_PULSE:
|
||||||
CONTAINER: BVP
|
CONTAINER: BVP
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
DBDP:
|
DBDP:
|
||||||
COMPUTE: False
|
COMPUTE: True
|
||||||
FEATURES: ["fqHighestPeakFreqs", "fqHighestPeaks", "fqEnergyFeat", "fqEntropyFeat", "fqHistogramBins","fqAbsMean", "fqSkewness", "fqKurtosis", "fqInterquart", # Freq features
|
FEATURES: ["fqHighestPeakFreqs", "fqHighestPeaks", "fqEnergyFeat", "fqEntropyFeat", "fqHistogramBins","fqAbsMean", "fqSkewness", "fqKurtosis", "fqInterquart", # Freq features
|
||||||
"maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"] # HRV features
|
"maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"] # HRV features
|
||||||
SRC_SCRIPT: src/features/empatica_blood_volume_pulse/dbdp/main.py
|
SRC_SCRIPT: src/features/empatica_blood_volume_pulse/dbdp/main.py
|
||||||
|
@ -558,7 +558,7 @@ EMPATICA_BLOOD_VOLUME_PULSE:
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR']
|
FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR']
|
||||||
WINDOWS:
|
WINDOWS:
|
||||||
COMPUTE: False
|
COMPUTE: True
|
||||||
WINDOW_LENGTH: 4 # specify window length in seconds
|
WINDOW_LENGTH: 4 # specify window length in seconds
|
||||||
SRC_SCRIPT: src/features/empatica_blood_volume_pulse/cr/main.py
|
SRC_SCRIPT: src/features/empatica_blood_volume_pulse/cr/main.py
|
||||||
|
|
||||||
|
|
|
@ -770,7 +770,8 @@ rule empatica_accelerometer_python_features:
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "empatica_accelerometer"
|
sensor_key = "empatica_accelerometer"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}.csv"
|
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}.csv",
|
||||||
|
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}_windows.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
|
@ -796,7 +797,8 @@ rule empatica_heartrate_python_features:
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "empatica_heartrate"
|
sensor_key = "empatica_heartrate"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}.csv"
|
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}.csv",
|
||||||
|
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}_windows.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
|
@ -822,7 +824,8 @@ rule empatica_temperature_python_features:
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "empatica_temperature"
|
sensor_key = "empatica_temperature"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}.csv"
|
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}.csv",
|
||||||
|
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}_windows.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
|
@ -848,7 +851,8 @@ rule empatica_electrodermal_activity_python_features:
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "empatica_electrodermal_activity"
|
sensor_key = "empatica_electrodermal_activity"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}.csv"
|
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}.csv",
|
||||||
|
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}_windows.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
|
@ -874,7 +878,8 @@ rule empatica_blood_volume_pulse_python_features:
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "empatica_blood_volume_pulse"
|
sensor_key = "empatica_blood_volume_pulse"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}.csv"
|
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}.csv",
|
||||||
|
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
|
@ -900,7 +905,8 @@ rule empatica_inter_beat_interval_python_features:
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "empatica_inter_beat_interval"
|
sensor_key = "empatica_inter_beat_interval"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}.csv"
|
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}.csv",
|
||||||
|
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}_windows.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
|
|
|
@ -53,7 +53,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
||||||
|
|
||||||
requested_intraday_features = provider["FEATURES"]
|
requested_intraday_features = provider["FEATURES"]
|
||||||
|
|
||||||
if provider["WINDOWS"]["COMPUTE"]:
|
calc_windows = kwargs.get('calc_windows', False)
|
||||||
|
|
||||||
|
if provider["WINDOWS"]["COMPUTE"] and calc_windows:
|
||||||
requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"]
|
requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"]
|
||||||
else:
|
else:
|
||||||
requested_window_length = None
|
requested_window_length = None
|
||||||
|
|
|
@ -47,7 +47,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
||||||
|
|
||||||
requested_intraday_features = provider["FEATURES"]
|
requested_intraday_features = provider["FEATURES"]
|
||||||
|
|
||||||
if provider["WINDOWS"]["COMPUTE"]:
|
calc_windows = kwargs.get('calc_windows', False)
|
||||||
|
|
||||||
|
if provider["WINDOWS"]["COMPUTE"] and calc_windows:
|
||||||
requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"]
|
requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"]
|
||||||
else:
|
else:
|
||||||
requested_window_length = None
|
requested_window_length = None
|
||||||
|
|
|
@ -46,7 +46,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
||||||
|
|
||||||
requested_intraday_features = provider["FEATURES"]
|
requested_intraday_features = provider["FEATURES"]
|
||||||
|
|
||||||
if provider["WINDOWS"]["COMPUTE"]:
|
calc_windows = kwargs.get('calc_windows', False)
|
||||||
|
|
||||||
|
if provider["WINDOWS"]["COMPUTE"] and calc_windows:
|
||||||
requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"]
|
requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"]
|
||||||
else:
|
else:
|
||||||
requested_window_length = None
|
requested_window_length = None
|
||||||
|
|
|
@ -45,7 +45,10 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
||||||
temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
|
temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||||
|
|
||||||
requested_intraday_features = provider["FEATURES"]
|
requested_intraday_features = provider["FEATURES"]
|
||||||
if provider["WINDOWS"]["COMPUTE"]:
|
|
||||||
|
calc_windows = kwargs.get('calc_windows', False)
|
||||||
|
|
||||||
|
if provider["WINDOWS"]["COMPUTE"] and calc_windows:
|
||||||
requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"]
|
requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"]
|
||||||
else:
|
else:
|
||||||
requested_window_length = None
|
requested_window_length = None
|
||||||
|
|
|
@ -7,6 +7,13 @@ provider = snakemake.params["provider"]
|
||||||
provider_key = snakemake.params["provider_key"]
|
provider_key = snakemake.params["provider_key"]
|
||||||
sensor_key = snakemake.params["sensor_key"]
|
sensor_key = snakemake.params["sensor_key"]
|
||||||
|
|
||||||
|
calc_windows = False
|
||||||
|
try:
|
||||||
|
calc_windows = provider["WINDOWS"]["COMPUTE"]
|
||||||
|
except KeyError:
|
||||||
|
print("Compute window key not found in config.yaml!")
|
||||||
|
|
||||||
|
|
||||||
if sensor_key == "all_cleaning_individual" or sensor_key == "all_cleaning_overall":
|
if sensor_key == "all_cleaning_individual" or sensor_key == "all_cleaning_overall":
|
||||||
# Data cleaning
|
# Data cleaning
|
||||||
sensor_features = run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files)
|
sensor_features = run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files)
|
||||||
|
@ -14,6 +21,14 @@ else:
|
||||||
# Extract sensor features
|
# Extract sensor features
|
||||||
del sensor_data_files["time_segments_labels"]
|
del sensor_data_files["time_segments_labels"]
|
||||||
time_segments_file = snakemake.input["time_segments_labels"]
|
time_segments_file = snakemake.input["time_segments_labels"]
|
||||||
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file)
|
|
||||||
|
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=calc_windows)
|
||||||
|
# Calculation over multiple windows in case of Empatica's CR-features
|
||||||
|
if calc_windows:
|
||||||
|
sensor_features.to_csv(snakemake.output[1], index=False)
|
||||||
|
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=False)
|
||||||
|
elif "empatica" in sensor_key and provider_key == "dbdp":
|
||||||
|
pd.DataFrame().to_csv(snakemake.output[1], index=False)
|
||||||
|
|
||||||
|
|
||||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -88,7 +88,7 @@ def chunk_episodes(sensor_episodes):
|
||||||
|
|
||||||
return merged_sensor_episodes
|
return merged_sensor_episodes
|
||||||
|
|
||||||
def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file):
|
def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=False):
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from importlib import import_module, util
|
from importlib import import_module, util
|
||||||
|
|
||||||
|
@ -106,7 +106,7 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file
|
||||||
time_segments_labels["label"] = [""]
|
time_segments_labels["label"] = [""]
|
||||||
for time_segment in time_segments_labels["label"]:
|
for time_segment in time_segments_labels["label"]:
|
||||||
print("{} Processing {} {} {}".format(rapids_log_tag, sensor_key, provider_key, time_segment))
|
print("{} Processing {} {} {}".format(rapids_log_tag, sensor_key, provider_key, time_segment))
|
||||||
features = feature_function(sensor_data_files, time_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes)
|
features = feature_function(sensor_data_files, time_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes, calc_windows=calc_windows)
|
||||||
if not "local_segment" in features.columns:
|
if not "local_segment" in features.columns:
|
||||||
raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)")
|
raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)")
|
||||||
features.columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features.columns]
|
features.columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features.columns]
|
||||||
|
|
Loading…
Reference in New Issue