Extraction of additional SO features. Min/max has been changed to nsmallest/nlargest means.
parent
f371249b99
commit
e1d7607de4
42
config.yaml
42
config.yaml
|
@ -484,13 +484,13 @@ EMPATICA_ACCELEROMETER:
|
||||||
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
||||||
SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py
|
SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py
|
||||||
CR:
|
CR:
|
||||||
COMPUTE: False
|
COMPUTE: True
|
||||||
FEATURES: ["totalMagnitudeBand", "absoluteMeanBand", "varianceBand"] # Acc features
|
FEATURES: ["totalMagnitudeBand", "absoluteMeanBand", "varianceBand"] # Acc features
|
||||||
WINDOWS:
|
WINDOWS:
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
WINDOW_LENGTH: 15 # specify window length in seconds
|
WINDOW_LENGTH: 15 # specify window length in seconds
|
||||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
|
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest_mean', 'nsmallest_mean', 'count_windows']
|
||||||
STANDARDIZE_SO_FEATURES: True
|
STANDARDIZE_FEATURES: False
|
||||||
SRC_SCRIPT: src/features/empatica_accelerometer/cr/main.py
|
SRC_SCRIPT: src/features/empatica_accelerometer/cr/main.py
|
||||||
|
|
||||||
|
|
||||||
|
@ -512,14 +512,14 @@ EMPATICA_TEMPERATURE:
|
||||||
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
|
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
|
||||||
SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
|
SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
|
||||||
CR:
|
CR:
|
||||||
COMPUTE: False
|
COMPUTE: True
|
||||||
FEATURES: ["maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean",
|
FEATURES: ["maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean",
|
||||||
"stdDev", "median", "meanChange", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"]
|
"stdDev", "median", "meanChange", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"]
|
||||||
WINDOWS:
|
WINDOWS:
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
WINDOW_LENGTH: 300 # specify window length in seconds
|
WINDOW_LENGTH: 300 # specify window length in seconds
|
||||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
|
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest_mean', 'nsmallest_mean', 'count_windows']
|
||||||
STANDARDIZE_SO_FEATURES: True
|
STANDARDIZE_FEATURES: False
|
||||||
SRC_SCRIPT: src/features/empatica_temperature/cr/main.py
|
SRC_SCRIPT: src/features/empatica_temperature/cr/main.py
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/features/empatica-electrodermal-activity/
|
# See https://www.rapids.science/latest/features/empatica-electrodermal-activity/
|
||||||
|
@ -531,17 +531,17 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
|
||||||
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
|
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
|
||||||
SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py
|
SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py
|
||||||
CR:
|
CR:
|
||||||
COMPUTE: False
|
COMPUTE: True
|
||||||
FEATURES: ['mean', 'std', 'q25', 'q75', 'qd', 'deriv', 'power', 'numPeaks', 'ratePeaks', 'powerPeaks', 'sumPosDeriv', 'propPosDeriv', 'derivTonic',
|
FEATURES: ['mean', 'std', 'q25', 'q75', 'qd', 'deriv', 'power', 'numPeaks', 'ratePeaks', 'powerPeaks', 'sumPosDeriv', 'propPosDeriv', 'derivTonic',
|
||||||
'sigTonicDifference', 'freqFeats','maxPeakAmplitudeChangeBefore', 'maxPeakAmplitudeChangeAfter', 'avgPeakAmplitudeChangeBefore',
|
'sigTonicDifference', 'freqFeats','maxPeakAmplitudeChangeBefore', 'maxPeakAmplitudeChangeAfter', 'avgPeakAmplitudeChangeBefore',
|
||||||
'avgPeakAmplitudeChangeAfter', 'avgPeakChangeRatio', 'maxPeakIncreaseTime', 'maxPeakDecreaseTime', 'maxPeakDuration', 'maxPeakChangeRatio',
|
'avgPeakAmplitudeChangeAfter', 'avgPeakChangeRatio', 'maxPeakIncreaseTime', 'maxPeakDecreaseTime', 'maxPeakDuration', 'maxPeakChangeRatio',
|
||||||
'avgPeakIncreaseTime', 'avgPeakDecreaseTime', 'avgPeakDuration', 'signalOverallChange', 'changeDuration', 'changeRate', 'significantIncrease',
|
'avgPeakIncreaseTime', 'avgPeakDecreaseTime', 'avgPeakDuration', 'signalOverallChange', 'changeDuration', 'changeRate', 'significantIncrease',
|
||||||
'significantDecrease']
|
'significantDecrease']
|
||||||
WINDOWS:
|
WINDOWS:
|
||||||
COMPUTE: False
|
COMPUTE: True
|
||||||
WINDOW_LENGTH: 60 # specify window length in seconds
|
WINDOW_LENGTH: 60 # specify window length in seconds
|
||||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
|
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest_mean', 'nsmallest_mean', count_windows, eda_num_peaks_non_zero]
|
||||||
STANDARDIZE_SO_FEATURES: True
|
STANDARDIZE_FEATURES: False
|
||||||
SRC_SCRIPT: src/features/empatica_electrodermal_activity/cr/main.py
|
SRC_SCRIPT: src/features/empatica_electrodermal_activity/cr/main.py
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/features/empatica-blood-volume-pulse/
|
# See https://www.rapids.science/latest/features/empatica-blood-volume-pulse/
|
||||||
|
@ -559,8 +559,8 @@ EMPATICA_BLOOD_VOLUME_PULSE:
|
||||||
WINDOWS:
|
WINDOWS:
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
WINDOW_LENGTH: 300 # specify window length in seconds
|
WINDOW_LENGTH: 300 # specify window length in seconds
|
||||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
|
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest_mean', 'nsmallest_mean', 'count_windows']
|
||||||
STANDARDIZE_SO_FEATURES: True
|
STANDARDIZE_FEATURES: False
|
||||||
SRC_SCRIPT: src/features/empatica_blood_volume_pulse/cr/main.py
|
SRC_SCRIPT: src/features/empatica_blood_volume_pulse/cr/main.py
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/features/empatica-inter-beat-interval/
|
# See https://www.rapids.science/latest/features/empatica-inter-beat-interval/
|
||||||
|
@ -579,8 +579,8 @@ EMPATICA_INTER_BEAT_INTERVAL:
|
||||||
WINDOWS:
|
WINDOWS:
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
WINDOW_LENGTH: 300 # specify window length in seconds
|
WINDOW_LENGTH: 300 # specify window length in seconds
|
||||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
|
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest_mean', 'nsmallest_mean', 'count_windows']
|
||||||
STANDARDIZE_SO_FEATURES: True
|
STANDARDIZE_FEATURES: False
|
||||||
SRC_SCRIPT: src/features/empatica_inter_beat_interval/cr/main.py
|
SRC_SCRIPT: src/features/empatica_inter_beat_interval/cr/main.py
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/features/empatica-tags/
|
# See https://www.rapids.science/latest/features/empatica-tags/
|
||||||
|
@ -663,3 +663,17 @@ ALL_CLEANING_OVERALL:
|
||||||
MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5
|
MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5
|
||||||
CORR_THRESHOLD: 0.95
|
CORR_THRESHOLD: 0.95
|
||||||
SRC_SCRIPT: src/features/all_cleaning_overall/rapids/main.R
|
SRC_SCRIPT: src/features/all_cleaning_overall/rapids/main.R
|
||||||
|
|
||||||
|
|
||||||
|
########################################################################################################################
|
||||||
|
# Z-score standardization #
|
||||||
|
########################################################################################################################
|
||||||
|
STANDARDIZATION:
|
||||||
|
COMPUTE: True
|
||||||
|
EXCECUTE_FULL_PIPELINE: False # Standardization to be calculated from feature extraction step including merging all sensors and participants steps (in seperate standardization file)
|
||||||
|
EMPATICA_STANDARDIZATION:
|
||||||
|
PROVIDERS:
|
||||||
|
CR:
|
||||||
|
COMPUTE: False
|
||||||
|
TYPE: FROM_FIRST_ORDER # FROM_FIRST_ORDER or FROM_SECOND_ORDER(not implemented)
|
||||||
|
SRC_SCRIPT: src/features/all_cleaning_overall/rapids/main.R
|
||||||
|
|
|
@ -6,16 +6,25 @@ import sys
|
||||||
def extract_second_order_features(intraday_features, so_features_names):
|
def extract_second_order_features(intraday_features, so_features_names):
|
||||||
if not intraday_features.empty:
|
if not intraday_features.empty:
|
||||||
so_features = pd.DataFrame()
|
so_features = pd.DataFrame()
|
||||||
|
#print(intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).nsmallest())
|
||||||
if "mean" in so_features_names:
|
if "mean" in so_features_names:
|
||||||
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).mean().add_suffix("_SO_mean")], axis=1)
|
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).mean().add_suffix("_SO_mean")], axis=1)
|
||||||
if "median" in so_features_names:
|
if "median" in so_features_names:
|
||||||
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).median().add_suffix("_SO_median")], axis=1)
|
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).median().add_suffix("_SO_median")], axis=1)
|
||||||
if "sd" in so_features_names:
|
if "sd" in so_features_names:
|
||||||
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).std().add_suffix("_SO_sd")], axis=1)
|
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).std().add_suffix("_SO_sd")], axis=1)
|
||||||
if "max" in so_features_names:
|
if "nlargest_mean" in so_features_names: # largest 5 -- maybe there is a faster groupby solution?
|
||||||
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).max().add_suffix("_SO_max")], axis=1)
|
for column in intraday_features.columns[2:]:
|
||||||
if "min" in so_features_names:
|
so_features[column+"_SO_nlargest_mean"] = intraday_features.drop("level_1", axis=1).groupby("local_segment")[column].apply(lambda x: x.nlargest(5).mean())
|
||||||
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).min().add_suffix("_SO_min")], axis=1)
|
if "nsmallest_mean" in so_features_names: # smallest 5 -- maybe there is a faster groupby solution?
|
||||||
|
for column in intraday_features.columns[2:]:
|
||||||
|
so_features[column+"_SO_nsmallest_mean"] = intraday_features.drop("level_1", axis=1).groupby("local_segment")[column].apply(lambda x: x.nsmallest(5).mean())
|
||||||
|
if "count_windows" in so_features_names:
|
||||||
|
so_features["SO_windowsCount"] = intraday_features.groupby(["local_segment"]).count()["level_1"]
|
||||||
|
|
||||||
|
# numPeaksNonZero specialized for EDA sensor
|
||||||
|
if "eda_num_peaks_non_zero" in so_features_names and "numPeaks" in intraday_features.columns:
|
||||||
|
so_features["SO_numPeaksNonZero"] = intraday_features.groupby("local_segment")["numPeaks"].apply(lambda x: (x!=0).sum())
|
||||||
|
|
||||||
so_features.reset_index(inplace=True)
|
so_features.reset_index(inplace=True)
|
||||||
|
|
||||||
|
|
|
@ -66,7 +66,7 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
||||||
requested_window_length, time_segment, filter_data_by_segment)
|
requested_window_length, time_segment, filter_data_by_segment)
|
||||||
|
|
||||||
if calc_windows:
|
if calc_windows:
|
||||||
if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False):
|
if provider["WINDOWS"].get("STANDARDIZE_FEATURES", False):
|
||||||
fo_columns = bvp_intraday_features.columns.values[2:]
|
fo_columns = bvp_intraday_features.columns.values[2:]
|
||||||
fo_columns_z_score = [col + "_zscore" for col in fo_columns]
|
fo_columns_z_score = [col + "_zscore" for col in fo_columns]
|
||||||
bvp_intraday_features[fo_columns_z_score] = StandardScaler().fit_transform(bvp_intraday_features[fo_columns])
|
bvp_intraday_features[fo_columns_z_score] = StandardScaler().fit_transform(bvp_intraday_features[fo_columns])
|
||||||
|
|
|
@ -33,7 +33,7 @@ def extract_ibi_features_from_intraday_data(ibi_intraday_data, features, window_
|
||||||
signal_2D = \
|
signal_2D = \
|
||||||
convert_ibi_to2d_time(x[['timings', 'inter_beat_interval']], math.ceil(x['timings'].iloc[-1]))[0],
|
convert_ibi_to2d_time(x[['timings', 'inter_beat_interval']], math.ceil(x['timings'].iloc[-1]))[0],
|
||||||
ibi_timings = \
|
ibi_timings = \
|
||||||
convert_ibi_to2d_time(x[['timings', 'inter_beat_interval']], math.ceil(x['timings'].iloc[-1]))[1],
|
convert_ibi_to2d_time(x[['timings', 'inter_beat_interval']], math.ceil(x['timings'].iloc[-1]))[1],
|
||||||
sampling=None, hampel_fiter=False, median_filter=False, mod_z_score_filter=True, feature_names=features))
|
sampling=None, hampel_fiter=False, median_filter=False, mod_z_score_filter=True, feature_names=features))
|
||||||
else:
|
else:
|
||||||
ibi_intraday_features = \
|
ibi_intraday_features = \
|
||||||
|
@ -70,7 +70,7 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
||||||
ibi_intraday_features = extract_ibi_features_from_intraday_data(ibi_intraday_data, intraday_features_to_compute,
|
ibi_intraday_features = extract_ibi_features_from_intraday_data(ibi_intraday_data, intraday_features_to_compute,
|
||||||
requested_window_length, time_segment, filter_data_by_segment)
|
requested_window_length, time_segment, filter_data_by_segment)
|
||||||
if calc_windows:
|
if calc_windows:
|
||||||
if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False):
|
if provider["WINDOWS"].get("STANDARDIZE_FEATURES", False):
|
||||||
fo_columns = ibi_intraday_features.columns.values[2:]
|
fo_columns = ibi_intraday_features.columns.values[2:]
|
||||||
fo_columns_z_score = [col + "_zscore" for col in fo_columns]
|
fo_columns_z_score = [col + "_zscore" for col in fo_columns]
|
||||||
ibi_intraday_features[fo_columns_z_score] = StandardScaler().fit_transform(ibi_intraday_features[fo_columns])
|
ibi_intraday_features[fo_columns_z_score] = StandardScaler().fit_transform(ibi_intraday_features[fo_columns])
|
||||||
|
|
|
@ -23,22 +23,6 @@ else:
|
||||||
if calc_windows:
|
if calc_windows:
|
||||||
window_features, second_order_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=True)
|
window_features, second_order_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=True)
|
||||||
|
|
||||||
# # Get basic stats from all participant's windows
|
|
||||||
# fo_means_stds = pd.DataFrame({"mean": window_features.mean(), "median": window_features.median(), "sd": window_features.std(),
|
|
||||||
# "min": window_features.min(), "max": window_features.max()})
|
|
||||||
|
|
||||||
# fo_columns = window_features.columns.values[5:]
|
|
||||||
# fo_columns_z_score = [col + "_zscore" for col in fo_columns]
|
|
||||||
# window_features[fo_columns_z_score] = StandardScaler().fit_transform(window_features[fo_columns])
|
|
||||||
|
|
||||||
# print(fo_means_stds)
|
|
||||||
|
|
||||||
# Z-score SO features by columns
|
|
||||||
# if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False):
|
|
||||||
# for indx, fo_mean_std in fo_means_stds.iterrows():
|
|
||||||
# print(indx, fo_mean_std)
|
|
||||||
|
|
||||||
# sys.exit()
|
|
||||||
window_features.to_csv(snakemake.output[1], index=False)
|
window_features.to_csv(snakemake.output[1], index=False)
|
||||||
second_order_features.to_csv(snakemake.output[0], index=False)
|
second_order_features.to_csv(snakemake.output[0], index=False)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
import pandas as pd
|
||||||
|
import seaborn as sns
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
# path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv" # all features all participants
|
||||||
|
# path = "/rapids/data/interim/p03/empatica_accelerometer_features/empatica_accelerometer_python_cr_windows.csv"
|
||||||
|
path = "/rapids/data/interim/p031/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_cr_windows.csv"
|
||||||
|
# path = "/rapids/data/interim/p02/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_cr_windows.csv"
|
||||||
|
# path = "/rapids/data/interim/p02/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_cr_windows.csv"
|
||||||
|
# path = "/rapids/data/interim/p02/empatica_temperature_features/empatica_temperature_python_cr_windows.csv"
|
||||||
|
|
||||||
|
df = pd.read_csv(path)
|
||||||
|
print(df)
|
||||||
|
is_NaN = df.isnull()
|
||||||
|
df = df[df["empatica_electrodermal_activity_cr_numPeaks"]]
|
||||||
|
print(df)
|
||||||
|
|
||||||
|
|
||||||
|
# row_has_NaN = is_NaN. any(axis=1)
|
||||||
|
# rows_with_NaN = df[row_has_NaN]
|
||||||
|
# print(rows_with_NaN.size)
|
||||||
|
|
||||||
|
# sns.heatmap(df.isna(), cbar=False)
|
||||||
|
plt.savefig('eda_windows_p03_window_60_thresh_default.png', bbox_inches='tight')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue