From 9cf9e1fe14e8926fdd19dbbeefc87443865c33b4 Mon Sep 17 00:00:00 2001 From: Primoz Date: Tue, 10 May 2022 11:36:49 +0000 Subject: [PATCH] Testing and modifying the code with different E4 data. --- config.yaml | 6 +++--- data/external/participant_files/p01.yaml | 10 +++++----- src/features/cr_features_helper_methods.py | 8 ++++++-- src/features/empatica_accelerometer/cr/main.py | 4 ++-- src/features/empatica_blood_volume_pulse/cr/main.py | 12 ++---------- .../empatica_electrodermal_activity/cr/main.py | 4 ++-- src/features/empatica_inter_beat_interval/cr/main.py | 5 ++--- src/features/empatica_temperature/cr/main.py | 12 ++---------- src/features/utils/utils.py | 4 ++-- 9 files changed, 26 insertions(+), 39 deletions(-) diff --git a/config.yaml b/config.yaml index ea6aa2be..97785709 100644 --- a/config.yaml +++ b/config.yaml @@ -3,7 +3,7 @@ ######################################################################################################################## # See https://www.rapids.science/latest/setup/configuration/#participant-files -PIDS: [p01] +PIDS: [p02, p01] # See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files CREATE_PARTICIPANT_FILES: @@ -477,11 +477,11 @@ EMPATICA_ACCELEROMETER: CONTAINER: ACC PROVIDERS: DBDP: - COMPUTE: True + COMPUTE: False FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py CR: - COMPUTE: True + COMPUTE: False FEATURES: ["fqHighestPeakFreqs", "fqHighestPeaks", "fqEnergyFeat", "fqEntropyFeat", "fqHistogramBins","fqAbsMean", "fqSkewness", "fqKurtosis", "fqInterquart", # Freq features "meanLow", "areaLow", "totalAbsoluteAreaBand", "totalMagnitudeBand", "entropyBand", "skewnessBand", "kurtosisBand", "postureDistanceLow", "absoluteMeanBand", "absoluteAreaBand", "quartilesBand", "interQuartileRangeBand", "varianceBand", diff --git a/data/external/participant_files/p01.yaml b/data/external/participant_files/p01.yaml index 90f48512..fe394a76 100644 --- a/data/external/participant_files/p01.yaml +++ b/data/external/participant_files/p01.yaml @@ -1,9 +1,9 @@ PHONE: - DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524] # the participant's AWARE device id - PLATFORMS: [android] # or ios - LABEL: MyTestP01 # any string - START_DATE: 2020-01-01 # this can also be empty - END_DATE: 2021-01-01 # this can also be empty + DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524] # the participant's AWARE device id + PLATFORMS: [android] # or ios + LABEL: MyTestP01 # any string + START_DATE: 2020-01-01 # this can also be empty + END_DATE: 2021-01-01 # this can also be empty EMPATICA: DEVICE_IDS: [empatica1] LABEL: test01 diff --git a/src/features/cr_features_helper_methods.py b/src/features/cr_features_helper_methods.py index b963b46e..f8dd3456 100644 --- a/src/features/cr_features_helper_methods.py +++ b/src/features/cr_features_helper_methods.py @@ -1,4 +1,7 @@ import pandas as pd +import math as m + +import sys def extract_second_order_features(intraday_features, so_features_names): if not intraday_features.empty: @@ -21,10 +24,11 @@ def extract_second_order_features(intraday_features, so_features_names): return so_features -def get_sample_rate(data): +def get_sample_rate(data): # To-Do get the sample rate information from the file's metadata try: timestamps_diff = data['timestamp'].diff().dropna().mean() + print("Timestamp diff:", timestamps_diff) except: raise Exception("Error occured while trying to get the mean sample rate from the data.") - return int(1000/timestamps_diff) \ No newline at end of file + return m.ceil(1000/timestamps_diff) \ No newline at end of file diff --git a/src/features/empatica_accelerometer/cr/main.py b/src/features/empatica_accelerometer/cr/main.py index ce2fc2f9..9a11a752 100644 --- a/src/features/empatica_accelerometer/cr/main.py +++ b/src/features/empatica_accelerometer/cr/main.py @@ -3,7 +3,7 @@ from scipy.stats import entropy from cr_features.helper_functions import convert_to2d, accelerometer_features, frequency_features from cr_features.calculate_features_old import calculateFeatures -from cr_features_helper_methods import get_sample_rate, extract_second_order_features +from cr_features_helper_methods import extract_second_order_features import sys @@ -11,7 +11,7 @@ def extract_acc_features_from_intraday_data(acc_intraday_data, features, window_ acc_intraday_features = pd.DataFrame(columns=["local_segment"] + features) if not acc_intraday_data.empty: - sample_rate = get_sample_rate(acc_intraday_data) + sample_rate = 32 acc_intraday_data = filter_data_by_segment(acc_intraday_data, time_segment) diff --git a/src/features/empatica_blood_volume_pulse/cr/main.py b/src/features/empatica_blood_volume_pulse/cr/main.py index 44357e7f..7e991159 100644 --- a/src/features/empatica_blood_volume_pulse/cr/main.py +++ b/src/features/empatica_blood_volume_pulse/cr/main.py @@ -3,24 +3,16 @@ from scipy.stats import entropy from cr_features.helper_functions import convert_to2d, hrv_features, hrv_freq_features from cr_features.hrv import extract_hrv_features_2d_wrapper -from cr_features_helper_methods import get_sample_rate, extract_second_order_features +from cr_features_helper_methods import extract_second_order_features import sys -def get_sample_rate(data): - try: - timestamps_diff = data['timestamp'].diff().dropna().mean() - except: - raise Exception("Error occured while trying to get the mean sample rate from the data.") - - return int(1000/timestamps_diff) - def extract_bvp_features_from_intraday_data(bvp_intraday_data, features, window_length, time_segment, filter_data_by_segment): bvp_intraday_features = pd.DataFrame(columns=["local_segment"] + features) if not bvp_intraday_data.empty: - sample_rate = get_sample_rate(bvp_intraday_data) + sample_rate = 64 bvp_intraday_data = filter_data_by_segment(bvp_intraday_data, time_segment) diff --git a/src/features/empatica_electrodermal_activity/cr/main.py b/src/features/empatica_electrodermal_activity/cr/main.py index 6630027a..dee4869f 100644 --- a/src/features/empatica_electrodermal_activity/cr/main.py +++ b/src/features/empatica_electrodermal_activity/cr/main.py @@ -3,14 +3,14 @@ from scipy.stats import entropy from cr_features.helper_functions import convert_to2d, gsr_features from cr_features.calculate_features import calculate_features -from cr_features_helper_methods import get_sample_rate, extract_second_order_features +from cr_features_helper_methods import extract_second_order_features def extract_eda_features_from_intraday_data(eda_intraday_data, features, window_length, time_segment, filter_data_by_segment): eda_intraday_features = pd.DataFrame(columns=["local_segment"] + features) if not eda_intraday_data.empty: - sample_rate = get_sample_rate(eda_intraday_data) + sample_rate = 4 eda_intraday_data = filter_data_by_segment(eda_intraday_data, time_segment) diff --git a/src/features/empatica_inter_beat_interval/cr/main.py b/src/features/empatica_inter_beat_interval/cr/main.py index 02c0cd94..4d8c4fd1 100644 --- a/src/features/empatica_inter_beat_interval/cr/main.py +++ b/src/features/empatica_inter_beat_interval/cr/main.py @@ -3,7 +3,7 @@ import numpy as np from cr_features.helper_functions import convert_ibi_to2d_time, hrv_features, hrv_freq_features from cr_features.hrv import extract_hrv_features_2d_wrapper -from cr_features_helper_methods import get_sample_rate, extract_second_order_features +from cr_features_helper_methods import extract_second_order_features import math import sys @@ -16,8 +16,7 @@ def extract_ibi_features_from_intraday_data(ibi_intraday_data, features, window_ ibi_intraday_features = pd.DataFrame(columns=["local_segment"] + features) if not ibi_intraday_data.empty: - sample_rate = get_sample_rate(ibi_intraday_data) - + ibi_intraday_data = filter_data_by_segment(ibi_intraday_data, time_segment) if not ibi_intraday_data.empty: diff --git a/src/features/empatica_temperature/cr/main.py b/src/features/empatica_temperature/cr/main.py index a71b0a69..1eea539a 100644 --- a/src/features/empatica_temperature/cr/main.py +++ b/src/features/empatica_temperature/cr/main.py @@ -3,23 +3,15 @@ from scipy.stats import entropy from cr_features.helper_functions import convert_to2d, generic_features from cr_features.calculate_features_old import calculateFeatures -from cr_features_helper_methods import get_sample_rate, extract_second_order_features +from cr_features_helper_methods import extract_second_order_features import sys -def get_sample_rate(data): - try: - timestamps_diff = data['timestamp'].diff().dropna().mean() - except: - raise Exception("Error occured while trying to get the mean sample rate from the data.") - - return int(1000/timestamps_diff) - def extract_temp_features_from_intraday_data(temperature_intraday_data, features, window_length, time_segment, filter_data_by_segment): temperature_intraday_features = pd.DataFrame(columns=["local_segment"] + features) if not temperature_intraday_data.empty: - sample_rate = get_sample_rate(temperature_intraday_data) + sample_rate = 4 temperature_intraday_data = filter_data_by_segment(temperature_intraday_data, time_segment) diff --git a/src/features/utils/utils.py b/src/features/utils/utils.py index 832620d6..90608ab1 100644 --- a/src/features/utils/utils.py +++ b/src/features/utils/utils.py @@ -118,9 +118,9 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file features[0].columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features[0].columns] features[1].columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features[1].columns] if not features[0].empty: - sensor_fo_features = pd.concat([sensor_features, features[0]], axis=0, sort=False) + sensor_fo_features = pd.concat([sensor_fo_features, features[0]], axis=0, sort=False) if not features[1].empty: - sensor_so_features = pd.concat([sensor_features, features[1]], axis=0, sort=False) + sensor_so_features = pd.concat([sensor_so_features, features[1]], axis=0, sort=False) else: if not "local_segment" in features.columns: raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)")