First order features standardization WIP
parent
64e41cfa35
commit
f371249b99
14
config.yaml
14
config.yaml
|
@ -3,7 +3,7 @@
|
||||||
########################################################################################################################
|
########################################################################################################################
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/setup/configuration/#participant-files
|
# See https://www.rapids.science/latest/setup/configuration/#participant-files
|
||||||
PIDS: [p031] #p01, p02, p03]
|
PIDS: [p03] #p01, p02, p03]
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files
|
# See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files
|
||||||
CREATE_PARTICIPANT_FILES:
|
CREATE_PARTICIPANT_FILES:
|
||||||
|
@ -183,7 +183,7 @@ PHONE_CALLS:
|
||||||
CONTAINER: call
|
CONTAINER: call
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: True
|
COMPUTE: False
|
||||||
FEATURES_TYPE: EPISODES # EVENTS or EPISODES
|
FEATURES_TYPE: EPISODES # EVENTS or EPISODES
|
||||||
CALL_TYPES: [missed, incoming, outgoing]
|
CALL_TYPES: [missed, incoming, outgoing]
|
||||||
FEATURES:
|
FEATURES:
|
||||||
|
@ -484,7 +484,7 @@ EMPATICA_ACCELEROMETER:
|
||||||
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
||||||
SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py
|
SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py
|
||||||
CR:
|
CR:
|
||||||
COMPUTE: True
|
COMPUTE: False
|
||||||
FEATURES: ["totalMagnitudeBand", "absoluteMeanBand", "varianceBand"] # Acc features
|
FEATURES: ["totalMagnitudeBand", "absoluteMeanBand", "varianceBand"] # Acc features
|
||||||
WINDOWS:
|
WINDOWS:
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
|
@ -499,7 +499,7 @@ EMPATICA_HEARTRATE:
|
||||||
CONTAINER: HR
|
CONTAINER: HR
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
DBDP:
|
DBDP:
|
||||||
COMPUTE: True
|
COMPUTE: False
|
||||||
FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr"]
|
FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr"]
|
||||||
SRC_SCRIPT: src/features/empatica_heartrate/dbdp/main.py
|
SRC_SCRIPT: src/features/empatica_heartrate/dbdp/main.py
|
||||||
|
|
||||||
|
@ -512,7 +512,7 @@ EMPATICA_TEMPERATURE:
|
||||||
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
|
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
|
||||||
SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
|
SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
|
||||||
CR:
|
CR:
|
||||||
COMPUTE: True
|
COMPUTE: False
|
||||||
FEATURES: ["maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean",
|
FEATURES: ["maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean",
|
||||||
"stdDev", "median", "meanChange", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"]
|
"stdDev", "median", "meanChange", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"]
|
||||||
WINDOWS:
|
WINDOWS:
|
||||||
|
@ -531,14 +531,14 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
|
||||||
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
|
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
|
||||||
SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py
|
SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py
|
||||||
CR:
|
CR:
|
||||||
COMPUTE: True
|
COMPUTE: False
|
||||||
FEATURES: ['mean', 'std', 'q25', 'q75', 'qd', 'deriv', 'power', 'numPeaks', 'ratePeaks', 'powerPeaks', 'sumPosDeriv', 'propPosDeriv', 'derivTonic',
|
FEATURES: ['mean', 'std', 'q25', 'q75', 'qd', 'deriv', 'power', 'numPeaks', 'ratePeaks', 'powerPeaks', 'sumPosDeriv', 'propPosDeriv', 'derivTonic',
|
||||||
'sigTonicDifference', 'freqFeats','maxPeakAmplitudeChangeBefore', 'maxPeakAmplitudeChangeAfter', 'avgPeakAmplitudeChangeBefore',
|
'sigTonicDifference', 'freqFeats','maxPeakAmplitudeChangeBefore', 'maxPeakAmplitudeChangeAfter', 'avgPeakAmplitudeChangeBefore',
|
||||||
'avgPeakAmplitudeChangeAfter', 'avgPeakChangeRatio', 'maxPeakIncreaseTime', 'maxPeakDecreaseTime', 'maxPeakDuration', 'maxPeakChangeRatio',
|
'avgPeakAmplitudeChangeAfter', 'avgPeakChangeRatio', 'maxPeakIncreaseTime', 'maxPeakDecreaseTime', 'maxPeakDuration', 'maxPeakChangeRatio',
|
||||||
'avgPeakIncreaseTime', 'avgPeakDecreaseTime', 'avgPeakDuration', 'signalOverallChange', 'changeDuration', 'changeRate', 'significantIncrease',
|
'avgPeakIncreaseTime', 'avgPeakDecreaseTime', 'avgPeakDuration', 'signalOverallChange', 'changeDuration', 'changeRate', 'significantIncrease',
|
||||||
'significantDecrease']
|
'significantDecrease']
|
||||||
WINDOWS:
|
WINDOWS:
|
||||||
COMPUTE: True
|
COMPUTE: False
|
||||||
WINDOW_LENGTH: 60 # specify window length in seconds
|
WINDOW_LENGTH: 60 # specify window length in seconds
|
||||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
|
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
|
||||||
STANDARDIZE_SO_FEATURES: True
|
STANDARDIZE_SO_FEATURES: True
|
||||||
|
|
|
@ -1,3 +1,2 @@
|
||||||
label,start_time,length,repeats_on,repeats_value
|
label,start_time,length,repeats_on,repeats_value
|
||||||
daily,00:00:00,23H 59M 59S,every_day,0
|
daily,00:00:00,23H 59M 59S,every_day,0
|
||||||
E4baseline,01:00:00,3H,every_day,0
|
|
||||||
|
|
|
|
@ -1,5 +1,5 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from scipy.stats import entropy
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
|
||||||
from cr_features.helper_functions import convert_to2d, hrv_features
|
from cr_features.helper_functions import convert_to2d, hrv_features
|
||||||
from cr_features.hrv import extract_hrv_features_2d_wrapper
|
from cr_features.hrv import extract_hrv_features_2d_wrapper
|
||||||
|
@ -7,6 +7,8 @@ from cr_features_helper_methods import extract_second_order_features
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
# pd.set_option('display.max_rows', 1000)
|
||||||
|
pd.set_option('display.max_columns', None)
|
||||||
|
|
||||||
def extract_bvp_features_from_intraday_data(bvp_intraday_data, features, window_length, time_segment, filter_data_by_segment):
|
def extract_bvp_features_from_intraday_data(bvp_intraday_data, features, window_length, time_segment, filter_data_by_segment):
|
||||||
bvp_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
|
bvp_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
|
||||||
|
@ -64,8 +66,14 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
||||||
requested_window_length, time_segment, filter_data_by_segment)
|
requested_window_length, time_segment, filter_data_by_segment)
|
||||||
|
|
||||||
if calc_windows:
|
if calc_windows:
|
||||||
|
if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False):
|
||||||
|
fo_columns = bvp_intraday_features.columns.values[2:]
|
||||||
|
fo_columns_z_score = [col + "_zscore" for col in fo_columns]
|
||||||
|
bvp_intraday_features[fo_columns_z_score] = StandardScaler().fit_transform(bvp_intraday_features[fo_columns])
|
||||||
|
|
||||||
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
|
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
|
||||||
bvp_second_order_features = extract_second_order_features(bvp_intraday_features, so_features_names)
|
bvp_second_order_features = extract_second_order_features(bvp_intraday_features, so_features_names)
|
||||||
|
|
||||||
return bvp_intraday_features, bvp_second_order_features
|
return bvp_intraday_features, bvp_second_order_features
|
||||||
|
|
||||||
return bvp_intraday_features
|
return bvp_intraday_features
|
|
@ -1,4 +1,5 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from cr_features.helper_functions import convert_ibi_to2d_time, hrv_features
|
from cr_features.helper_functions import convert_ibi_to2d_time, hrv_features
|
||||||
|
@ -8,8 +9,8 @@ from cr_features_helper_methods import extract_second_order_features
|
||||||
import math
|
import math
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
pd.set_option('display.max_rows', 1000)
|
# pd.set_option('display.max_rows', 1000)
|
||||||
#pd.set_option('display.max_columns', None)
|
pd.set_option('display.max_columns', None)
|
||||||
|
|
||||||
|
|
||||||
def extract_ibi_features_from_intraday_data(ibi_intraday_data, features, window_length, time_segment, filter_data_by_segment):
|
def extract_ibi_features_from_intraday_data(ibi_intraday_data, features, window_length, time_segment, filter_data_by_segment):
|
||||||
|
@ -69,8 +70,14 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
||||||
ibi_intraday_features = extract_ibi_features_from_intraday_data(ibi_intraday_data, intraday_features_to_compute,
|
ibi_intraday_features = extract_ibi_features_from_intraday_data(ibi_intraday_data, intraday_features_to_compute,
|
||||||
requested_window_length, time_segment, filter_data_by_segment)
|
requested_window_length, time_segment, filter_data_by_segment)
|
||||||
if calc_windows:
|
if calc_windows:
|
||||||
|
if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False):
|
||||||
|
fo_columns = ibi_intraday_features.columns.values[2:]
|
||||||
|
fo_columns_z_score = [col + "_zscore" for col in fo_columns]
|
||||||
|
ibi_intraday_features[fo_columns_z_score] = StandardScaler().fit_transform(ibi_intraday_features[fo_columns])
|
||||||
|
|
||||||
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
|
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
|
||||||
ibi_second_order_features = extract_second_order_features(ibi_intraday_features, so_features_names)
|
ibi_second_order_features = extract_second_order_features(ibi_intraday_features, so_features_names)
|
||||||
|
|
||||||
return ibi_intraday_features, ibi_second_order_features
|
return ibi_intraday_features, ibi_second_order_features
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,12 +21,24 @@ else:
|
||||||
time_segments_file = snakemake.input["time_segments_labels"]
|
time_segments_file = snakemake.input["time_segments_labels"]
|
||||||
|
|
||||||
if calc_windows:
|
if calc_windows:
|
||||||
window_features, second_order_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=True)
|
window_features, second_order_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=True)
|
||||||
|
|
||||||
|
# # Get basic stats from all participant's windows
|
||||||
|
# fo_means_stds = pd.DataFrame({"mean": window_features.mean(), "median": window_features.median(), "sd": window_features.std(),
|
||||||
|
# "min": window_features.min(), "max": window_features.max()})
|
||||||
|
|
||||||
|
# fo_columns = window_features.columns.values[5:]
|
||||||
|
# fo_columns_z_score = [col + "_zscore" for col in fo_columns]
|
||||||
|
# window_features[fo_columns_z_score] = StandardScaler().fit_transform(window_features[fo_columns])
|
||||||
|
|
||||||
|
# print(fo_means_stds)
|
||||||
|
|
||||||
# Z-score SO features by columns
|
# Z-score SO features by columns
|
||||||
if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False):
|
# if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False):
|
||||||
second_order_features[second_order_features.columns[4:]] = StandardScaler().fit_transform(second_order_features[second_order_features.columns[4:]])
|
# for indx, fo_mean_std in fo_means_stds.iterrows():
|
||||||
|
# print(indx, fo_mean_std)
|
||||||
|
|
||||||
|
# sys.exit()
|
||||||
window_features.to_csv(snakemake.output[1], index=False)
|
window_features.to_csv(snakemake.output[1], index=False)
|
||||||
second_order_features.to_csv(snakemake.output[0], index=False)
|
second_order_features.to_csv(snakemake.output[0], index=False)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue