From f371249b99c60bd859e969301561d7c9c187b159 Mon Sep 17 00:00:00 2001 From: primoz Date: Thu, 9 Jun 2022 13:35:15 +0000 Subject: [PATCH] First order features standardization WIP --- config.yaml | 14 +++++++------- data/external/timesegments_periodic.csv | 1 - .../empatica_blood_volume_pulse/cr/main.py | 10 +++++++++- .../empatica_inter_beat_interval/cr/main.py | 11 +++++++++-- src/features/entry.py | 18 +++++++++++++++--- 5 files changed, 40 insertions(+), 14 deletions(-) diff --git a/config.yaml b/config.yaml index c74d544e..2c3b5ab7 100644 --- a/config.yaml +++ b/config.yaml @@ -3,7 +3,7 @@ ######################################################################################################################## # See https://www.rapids.science/latest/setup/configuration/#participant-files -PIDS: [p031] #p01, p02, p03] +PIDS: [p03] #p01, p02, p03] # See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files CREATE_PARTICIPANT_FILES: @@ -183,7 +183,7 @@ PHONE_CALLS: CONTAINER: call PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False FEATURES_TYPE: EPISODES # EVENTS or EPISODES CALL_TYPES: [missed, incoming, outgoing] FEATURES: @@ -484,7 +484,7 @@ EMPATICA_ACCELEROMETER: FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py CR: - COMPUTE: True + COMPUTE: False FEATURES: ["totalMagnitudeBand", "absoluteMeanBand", "varianceBand"] # Acc features WINDOWS: COMPUTE: True @@ -499,7 +499,7 @@ EMPATICA_HEARTRATE: CONTAINER: HR PROVIDERS: DBDP: - COMPUTE: True + COMPUTE: False FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr"] SRC_SCRIPT: src/features/empatica_heartrate/dbdp/main.py @@ -512,7 +512,7 @@ EMPATICA_TEMPERATURE: FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"] SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py CR: - COMPUTE: True + COMPUTE: False FEATURES: ["maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean", "stdDev", "median", "meanChange", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"] WINDOWS: @@ -531,14 +531,14 @@ EMPATICA_ELECTRODERMAL_ACTIVITY: FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"] SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py CR: - COMPUTE: True + COMPUTE: False FEATURES: ['mean', 'std', 'q25', 'q75', 'qd', 'deriv', 'power', 'numPeaks', 'ratePeaks', 'powerPeaks', 'sumPosDeriv', 'propPosDeriv', 'derivTonic', 'sigTonicDifference', 'freqFeats','maxPeakAmplitudeChangeBefore', 'maxPeakAmplitudeChangeAfter', 'avgPeakAmplitudeChangeBefore', 'avgPeakAmplitudeChangeAfter', 'avgPeakChangeRatio', 'maxPeakIncreaseTime', 'maxPeakDecreaseTime', 'maxPeakDuration', 'maxPeakChangeRatio', 'avgPeakIncreaseTime', 'avgPeakDecreaseTime', 'avgPeakDuration', 'signalOverallChange', 'changeDuration', 'changeRate', 'significantIncrease', 'significantDecrease'] WINDOWS: - COMPUTE: True + COMPUTE: False WINDOW_LENGTH: 60 # specify window length in seconds SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min'] STANDARDIZE_SO_FEATURES: True diff --git a/data/external/timesegments_periodic.csv b/data/external/timesegments_periodic.csv index a2c4d9ac..117313e8 100644 --- a/data/external/timesegments_periodic.csv +++ b/data/external/timesegments_periodic.csv @@ -1,3 +1,2 @@ label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 -E4baseline,01:00:00,3H,every_day,0 diff --git a/src/features/empatica_blood_volume_pulse/cr/main.py b/src/features/empatica_blood_volume_pulse/cr/main.py index aa7a4186..9e6db52b 100644 --- a/src/features/empatica_blood_volume_pulse/cr/main.py +++ b/src/features/empatica_blood_volume_pulse/cr/main.py @@ -1,5 +1,5 @@ import pandas as pd -from scipy.stats import entropy +from sklearn.preprocessing import StandardScaler from cr_features.helper_functions import convert_to2d, hrv_features from cr_features.hrv import extract_hrv_features_2d_wrapper @@ -7,6 +7,8 @@ from cr_features_helper_methods import extract_second_order_features import sys +# pd.set_option('display.max_rows', 1000) +pd.set_option('display.max_columns', None) def extract_bvp_features_from_intraday_data(bvp_intraday_data, features, window_length, time_segment, filter_data_by_segment): bvp_intraday_features = pd.DataFrame(columns=["local_segment"] + features) @@ -64,8 +66,14 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen requested_window_length, time_segment, filter_data_by_segment) if calc_windows: + if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False): + fo_columns = bvp_intraday_features.columns.values[2:] + fo_columns_z_score = [col + "_zscore" for col in fo_columns] + bvp_intraday_features[fo_columns_z_score] = StandardScaler().fit_transform(bvp_intraday_features[fo_columns]) + so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"] bvp_second_order_features = extract_second_order_features(bvp_intraday_features, so_features_names) + return bvp_intraday_features, bvp_second_order_features return bvp_intraday_features \ No newline at end of file diff --git a/src/features/empatica_inter_beat_interval/cr/main.py b/src/features/empatica_inter_beat_interval/cr/main.py index 0e27a79f..41af4cf1 100644 --- a/src/features/empatica_inter_beat_interval/cr/main.py +++ b/src/features/empatica_inter_beat_interval/cr/main.py @@ -1,4 +1,5 @@ import pandas as pd +from sklearn.preprocessing import StandardScaler import numpy as np from cr_features.helper_functions import convert_ibi_to2d_time, hrv_features @@ -8,8 +9,8 @@ from cr_features_helper_methods import extract_second_order_features import math import sys -pd.set_option('display.max_rows', 1000) -#pd.set_option('display.max_columns', None) +# pd.set_option('display.max_rows', 1000) +pd.set_option('display.max_columns', None) def extract_ibi_features_from_intraday_data(ibi_intraday_data, features, window_length, time_segment, filter_data_by_segment): @@ -69,8 +70,14 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen ibi_intraday_features = extract_ibi_features_from_intraday_data(ibi_intraday_data, intraday_features_to_compute, requested_window_length, time_segment, filter_data_by_segment) if calc_windows: + if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False): + fo_columns = ibi_intraday_features.columns.values[2:] + fo_columns_z_score = [col + "_zscore" for col in fo_columns] + ibi_intraday_features[fo_columns_z_score] = StandardScaler().fit_transform(ibi_intraday_features[fo_columns]) + so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"] ibi_second_order_features = extract_second_order_features(ibi_intraday_features, so_features_names) + return ibi_intraday_features, ibi_second_order_features diff --git a/src/features/entry.py b/src/features/entry.py index a13a93de..46895c71 100644 --- a/src/features/entry.py +++ b/src/features/entry.py @@ -21,12 +21,24 @@ else: time_segments_file = snakemake.input["time_segments_labels"] if calc_windows: - window_features, second_order_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=True) + window_features, second_order_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=True) + # # Get basic stats from all participant's windows + # fo_means_stds = pd.DataFrame({"mean": window_features.mean(), "median": window_features.median(), "sd": window_features.std(), + # "min": window_features.min(), "max": window_features.max()}) + + # fo_columns = window_features.columns.values[5:] + # fo_columns_z_score = [col + "_zscore" for col in fo_columns] + # window_features[fo_columns_z_score] = StandardScaler().fit_transform(window_features[fo_columns]) + + # print(fo_means_stds) + # Z-score SO features by columns - if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False): - second_order_features[second_order_features.columns[4:]] = StandardScaler().fit_transform(second_order_features[second_order_features.columns[4:]]) + # if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False): + # for indx, fo_mean_std in fo_means_stds.iterrows(): + # print(indx, fo_mean_std) + # sys.exit() window_features.to_csv(snakemake.output[1], index=False) second_order_features.to_csv(snakemake.output[0], index=False)