Set E4 data yield to 1 if it is over 1. Optimize E4 data_yield script.

imputation_and_cleaning
Primoz 2022-10-27 14:11:42 +00:00
parent 5d17c92e54
commit 6b487fcf7b
1 changed files with 16 additions and 18 deletions

View File

@ -2,33 +2,31 @@ import pandas as pd
import numpy as np
from datetime import datetime
import sys
import sys, yaml
def calculate_empatica_data_yield(features): # TODO
# Get time segment duration in seconds from dataframe
datetime_start = datetime.strptime(features.loc[0, 'local_segment_start_datetime'], '%Y-%m-%d %H:%M:%S')
datetime_end = datetime.strptime(features.loc[0, 'local_segment_end_datetime'], '%Y-%m-%d %H:%M:%S')
tseg_duration = (datetime_end - datetime_start).total_seconds()
# Get time segment duration in seconds from all segments in features dataframe
datetime_start = pd.to_datetime(features['local_segment_start_datetime'], format='%Y-%m-%d %H:%M:%S')
datetime_end = pd.to_datetime(features['local_segment_end_datetime'], format='%Y-%m-%d %H:%M:%S')
tseg_duration = (datetime_end - datetime_start).dt.total_seconds()
features["acc_data_yield"] = (features['empatica_accelerometer_cr_SO_windowsCount'] * 15) / tseg_duration if 'empatica_accelerometer_cr_SO_windowsCount' in features else 0
features["temp_data_yield"] = (features['empatica_temperature_cr_SO_windowsCount'] * 300) / tseg_duration if 'empatica_temperature_cr_SO_windowsCount' in features else 0
features["eda_data_yield"] = (features['empatica_electrodermal_activity_cr_SO_windowsCount'] * 60) / tseg_duration if 'empatica_electrodermal_activity_cr_SO_windowsCount' in features else 0
features["ibi_data_yield"] = (features['empatica_inter_beat_interval_cr_SO_windowsCount'] * 300) / tseg_duration if 'empatica_inter_beat_interval_cr_SO_windowsCount' in features else 0
with open('config.yaml', 'r') as stream:
config = yaml.load(stream, Loader=yaml.FullLoader)
sensors = ["EMPATICA_ACCELEROMETER", "EMPATICA_TEMPERATURE", "EMPATICA_ELECTRODERMAL_ACTIVITY", "EMPATICA_INTER_BEAT_INTERVAL"]
for sensor in sensors:
features[f"{sensor.lower()}_data_yield"] = \
(features[f"{sensor.lower()}_cr_SO_windowsCount"] * config[sensor]["PROVIDERS"]["CR"]["WINDOWS"]["WINDOW_LENGTH"]) / tseg_duration \
if f'{sensor.lower()}_cr_SO_windowsCount' in features else 0
empatica_data_yield_cols = ['acc_data_yield', 'temp_data_yield', 'eda_data_yield', 'ibi_data_yield']
empatica_data_yield_cols = [sensor.lower() + "_data_yield" for sensor in sensors]
pd.set_option('display.max_rows', None)
print(tseg_duration)
print(features['empatica_accelerometer_cr_SO_windowsCount'])
print(features['empatica_temperature_cr_SO_windowsCount'])
print(features['empatica_electrodermal_activity_cr_SO_windowsCount'])
print(features['empatica_inter_beat_interval_cr_SO_windowsCount'])
# print((features['empatica_inter_beat_interval_cr_SO_windowsCount'] * 300) / tseg_duration)
sys.exit()
# Assigns 1 to values that are over 1 (in case of windows not being filled fully)
features[empatica_data_yield_cols] = features[empatica_data_yield_cols].apply(lambda x: [y if y <= 1 or np.isnan(y) else 1 for y in x])
features["empatica_data_yield"] = features[empatica_data_yield_cols].mean(axis=1).fillna(0)
features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average)
return features