rapids/src/features/empatica_data_yield.py

33 lines
1.6 KiB
Python

import pandas as pd
import numpy as np
from datetime import datetime
import sys, yaml
def calculate_empatica_data_yield(features): # TODO
# Get time segment duration in seconds from all segments in features dataframe
datetime_start = pd.to_datetime(features['local_segment_start_datetime'], format='%Y-%m-%d %H:%M:%S')
datetime_end = pd.to_datetime(features['local_segment_end_datetime'], format='%Y-%m-%d %H:%M:%S')
tseg_duration = (datetime_end - datetime_start).dt.total_seconds()
with open('config.yaml', 'r') as stream:
config = yaml.load(stream, Loader=yaml.FullLoader)
sensors = ["EMPATICA_ACCELEROMETER", "EMPATICA_TEMPERATURE", "EMPATICA_ELECTRODERMAL_ACTIVITY", "EMPATICA_INTER_BEAT_INTERVAL"]
for sensor in sensors:
features[f"{sensor.lower()}_data_yield"] = \
(features[f"{sensor.lower()}_cr_SO_windowsCount"] * config[sensor]["PROVIDERS"]["CR"]["WINDOWS"]["WINDOW_LENGTH"]) / tseg_duration \
if f'{sensor.lower()}_cr_SO_windowsCount' in features else 0
empatica_data_yield_cols = [sensor.lower() + "_data_yield" for sensor in sensors]
pd.set_option('display.max_rows', None)
# Assigns 1 to values that are over 1 (in case of windows not being filled fully)
features[empatica_data_yield_cols] = features[empatica_data_yield_cols].apply(lambda x: [y if y <= 1 or np.isnan(y) else 1 for y in x])
features["empatica_data_yield"] = features[empatica_data_yield_cols].mean(axis=1).fillna(0)
features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average)
return features