import pandas as pd import numpy as np from datetime import datetime import sys, yaml def calculate_empatica_data_yield(features): # TODO # Get time segment duration in seconds from all segments in features dataframe datetime_start = pd.to_datetime(features['local_segment_start_datetime'], format='%Y-%m-%d %H:%M:%S') datetime_end = pd.to_datetime(features['local_segment_end_datetime'], format='%Y-%m-%d %H:%M:%S') tseg_duration = (datetime_end - datetime_start).dt.total_seconds() with open('config.yaml', 'r') as stream: config = yaml.load(stream, Loader=yaml.FullLoader) sensors = ["EMPATICA_ACCELEROMETER", "EMPATICA_TEMPERATURE", "EMPATICA_ELECTRODERMAL_ACTIVITY", "EMPATICA_INTER_BEAT_INTERVAL"] for sensor in sensors: features[f"{sensor.lower()}_data_yield"] = \ (features[f"{sensor.lower()}_cr_SO_windowsCount"] * config[sensor]["PROVIDERS"]["CR"]["WINDOWS"]["WINDOW_LENGTH"]) / tseg_duration \ if f'{sensor.lower()}_cr_SO_windowsCount' in features else 0 empatica_data_yield_cols = [sensor.lower() + "_data_yield" for sensor in sensors] pd.set_option('display.max_rows', None) # Assigns 1 to values that are over 1 (in case of windows not being filled fully) features[empatica_data_yield_cols] = features[empatica_data_yield_cols].apply(lambda x: [y if y <= 1 or np.isnan(y) else 1 for y in x]) features["empatica_data_yield"] = features[empatica_data_yield_cols].mean(axis=1, numeric_only=True).fillna(0) features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average) return features