Implementation of the second order features.
parent
66451160e9
commit
5638367999
19
config.yaml
19
config.yaml
|
@ -493,6 +493,7 @@ EMPATICA_ACCELEROMETER:
|
|||
WINDOWS:
|
||||
COMPUTE: True
|
||||
WINDOW_LENGTH: 10 # specify window length in seconds
|
||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
|
||||
SRC_SCRIPT: src/features/empatica_accelerometer/cr/main.py
|
||||
|
||||
|
||||
|
@ -510,11 +511,11 @@ EMPATICA_TEMPERATURE:
|
|||
CONTAINER: TEMP
|
||||
PROVIDERS:
|
||||
DBDP:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
|
||||
SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
|
||||
CR:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["autocorrelations", "countAboveMean", "countBelowMean", "maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean",
|
||||
"longestStrikeBelowMean", "stdDev", "median", "meanChange", "numberOfZeroCrossings", "absEnergy", "linearTrendSlope",
|
||||
"ratioBeyondRSigma", "binnedEntropy", "numOfPeaksAutocorr", "numberOfZeroCrossingsAutocorr", "areaAutocorr",
|
||||
|
@ -523,6 +524,7 @@ EMPATICA_TEMPERATURE:
|
|||
WINDOWS:
|
||||
COMPUTE: True
|
||||
WINDOW_LENGTH: 90 # specify window length in seconds
|
||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
|
||||
SRC_SCRIPT: src/features/empatica_temperature/cr/main.py
|
||||
|
||||
# See https://www.rapids.science/latest/features/empatica-electrodermal-activity/
|
||||
|
@ -530,11 +532,11 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
|
|||
CONTAINER: EDA
|
||||
PROVIDERS:
|
||||
DBDP:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
|
||||
SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py
|
||||
CR:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ['mean', 'std', 'q25', 'q75', 'qd', 'deriv', 'power', 'numPeaks', 'ratePeaks', 'powerPeaks', 'sumPosDeriv', 'propPosDeriv', 'derivTonic',
|
||||
'sigTonicDifference', 'freqFeats','maxPeakAmplitudeChangeBefore', 'maxPeakAmplitudeChangeAfter', 'avgPeakAmplitudeChangeBefore',
|
||||
'avgPeakAmplitudeChangeAfter', 'avgPeakChangeRatio', 'maxPeakIncreaseTime', 'maxPeakDecreaseTime', 'maxPeakDuration', 'maxPeakChangeRatio',
|
||||
|
@ -543,6 +545,7 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
|
|||
WINDOWS:
|
||||
COMPUTE: True
|
||||
WINDOW_LENGTH: 80 # specify window length in seconds
|
||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
|
||||
SRC_SCRIPT: src/features/empatica_electrodermal_activity/cr/main.py
|
||||
|
||||
# See https://www.rapids.science/latest/features/empatica-blood-volume-pulse/
|
||||
|
@ -550,16 +553,17 @@ EMPATICA_BLOOD_VOLUME_PULSE:
|
|||
CONTAINER: BVP
|
||||
PROVIDERS:
|
||||
DBDP:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"]
|
||||
SRC_SCRIPT: src/features/empatica_blood_volume_pulse/dbdp/main.py
|
||||
CR:
|
||||
COMPUTE: False
|
||||
COMPUTE: True
|
||||
FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR', # Time features
|
||||
'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features
|
||||
WINDOWS:
|
||||
COMPUTE: True
|
||||
WINDOW_LENGTH: 10 # specify window length in seconds
|
||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
|
||||
SRC_SCRIPT: src/features/empatica_blood_volume_pulse/cr/main.py
|
||||
|
||||
# See https://www.rapids.science/latest/features/empatica-inter-beat-interval/
|
||||
|
@ -576,7 +580,8 @@ EMPATICA_INTER_BEAT_INTERVAL:
|
|||
'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features
|
||||
WINDOWS:
|
||||
COMPUTE: True
|
||||
WINDOW_LENGTH: 120 # specify window length in seconds
|
||||
WINDOW_LENGTH: 2000 # specify window length in seconds
|
||||
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
|
||||
SRC_SCRIPT: src/features/empatica_inter_beat_interval/cr/main.py
|
||||
|
||||
# See https://www.rapids.science/latest/features/empatica-tags/
|
||||
|
|
|
@ -110,12 +110,13 @@ dependencies:
|
|||
- bidict==0.22.0
|
||||
- biosppy==0.8.0
|
||||
- cached-property==1.5.2
|
||||
- calculatingfeatures==0.1.1
|
||||
- configargparse==0.15.1
|
||||
- cr-features==0.1.8
|
||||
- cycler==0.11.0
|
||||
- decorator==4.4.2
|
||||
- fonttools==4.31.2
|
||||
- fonttools==4.33.2
|
||||
- h5py==3.6.0
|
||||
- hmmlearn==0.2.7
|
||||
- ipython-genutils==0.2.0
|
||||
- jupyter-core==4.6.3
|
||||
- kiwisolver==1.4.2
|
||||
|
@ -124,7 +125,7 @@ dependencies:
|
|||
- opencv-python==4.5.5.64
|
||||
- packaging==21.3
|
||||
- peakutils==1.3.3
|
||||
- pillow==9.0.1
|
||||
- pillow==9.1.0
|
||||
- pulp==2.4
|
||||
- pyparsing==2.4.7
|
||||
- pyrsistent==0.15.5
|
||||
|
@ -134,5 +135,5 @@ dependencies:
|
|||
- snakemake==5.30.2
|
||||
- toposort==1.5
|
||||
- traitlets==4.3.3
|
||||
- typing-extensions==4.1.1
|
||||
- typing-extensions==4.2.0
|
||||
prefix: /opt/conda/envs/rapids
|
||||
|
|
|
@ -771,7 +771,8 @@ rule empatica_accelerometer_python_features:
|
|||
sensor_key = "empatica_accelerometer"
|
||||
output:
|
||||
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}.csv",
|
||||
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}_windows.csv"
|
||||
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}_windows.csv",
|
||||
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}_windows_SO_features.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
|
@ -798,7 +799,8 @@ rule empatica_heartrate_python_features:
|
|||
sensor_key = "empatica_heartrate"
|
||||
output:
|
||||
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}.csv",
|
||||
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}_windows.csv"
|
||||
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}_windows.csv",
|
||||
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}_windows_SO_features.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
|
@ -825,7 +827,8 @@ rule empatica_temperature_python_features:
|
|||
sensor_key = "empatica_temperature"
|
||||
output:
|
||||
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}.csv",
|
||||
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}_windows.csv"
|
||||
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}_windows.csv",
|
||||
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}_windows_SO_features.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
|
@ -852,7 +855,8 @@ rule empatica_electrodermal_activity_python_features:
|
|||
sensor_key = "empatica_electrodermal_activity"
|
||||
output:
|
||||
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}.csv",
|
||||
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}_windows.csv"
|
||||
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}_windows.csv",
|
||||
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}_windows_SO_features.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
|
@ -879,7 +883,8 @@ rule empatica_blood_volume_pulse_python_features:
|
|||
sensor_key = "empatica_blood_volume_pulse"
|
||||
output:
|
||||
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}.csv",
|
||||
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows.csv"
|
||||
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows.csv",
|
||||
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows_SO_features.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
|
@ -906,7 +911,8 @@ rule empatica_inter_beat_interval_python_features:
|
|||
sensor_key = "empatica_inter_beat_interval"
|
||||
output:
|
||||
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}.csv",
|
||||
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}_windows.csv"
|
||||
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}_windows.csv",
|
||||
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}_windows_SO_features.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
import pandas as pd
|
||||
|
||||
def extract_second_order_features(intraday_features, so_features_names):
|
||||
if not intraday_features.empty:
|
||||
so_features = pd.DataFrame()
|
||||
if "mean" in so_features_names:
|
||||
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).mean().add_suffix("_SO_mean")], axis=1)
|
||||
if "median" in so_features_names:
|
||||
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).mean().add_suffix("_SO_median")], axis=1)
|
||||
if "sd" in so_features_names:
|
||||
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).std().add_suffix("_SO_sd")], axis=1)
|
||||
if "max" in so_features_names:
|
||||
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).max().add_suffix("_SO_max")], axis=1)
|
||||
if "min" in so_features_names:
|
||||
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).min().add_suffix("_SO_min")], axis=1)
|
||||
|
||||
so_features.reset_index(inplace=True)
|
||||
|
||||
else:
|
||||
so_features = pd.DataFrame(columns=["local_segment"])
|
||||
|
||||
return so_features
|
||||
|
||||
def get_sample_rate(data):
|
||||
try:
|
||||
timestamps_diff = data['timestamp'].diff().dropna().mean()
|
||||
except:
|
||||
raise Exception("Error occured while trying to get the mean sample rate from the data.")
|
||||
|
||||
return int(1000/timestamps_diff)
|
|
@ -3,6 +3,7 @@ from scipy.stats import entropy
|
|||
|
||||
from cr_features.helper_functions import convert_to2d, accelerometer_features, frequency_features
|
||||
from cr_features.calculate_features import calculate_features
|
||||
from cr_features_helper_methods import get_sample_rate, extract_second_order_features
|
||||
|
||||
import sys
|
||||
|
||||
|
@ -69,4 +70,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
|||
acc_intraday_features = extract_acc_features_from_intraday_data(acc_intraday_data, intraday_features_to_compute,
|
||||
requested_window_length, time_segment, filter_data_by_segment)
|
||||
|
||||
if calc_windows:
|
||||
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
|
||||
acc_second_order_features = extract_second_order_features(acc_intraday_features, so_features_names)
|
||||
return acc_intraday_features, acc_second_order_features
|
||||
|
||||
return acc_intraday_features
|
|
@ -3,6 +3,7 @@ from scipy.stats import entropy
|
|||
|
||||
from cr_features.helper_functions import convert_to2d, hrv_features, hrv_freq_features
|
||||
from cr_features.hrv import extract_hrv_features_2d_wrapper
|
||||
from cr_features_helper_methods import get_sample_rate, extract_second_order_features
|
||||
|
||||
import sys
|
||||
|
||||
|
@ -74,4 +75,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
|||
bvp_intraday_features = extract_bvp_features_from_intraday_data(bvp_intraday_data, intraday_features_to_compute,
|
||||
requested_window_length, time_segment, filter_data_by_segment)
|
||||
|
||||
if calc_windows:
|
||||
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
|
||||
bvp_second_order_features = extract_second_order_features(bvp_intraday_features, so_features_names)
|
||||
return bvp_intraday_features, bvp_second_order_features
|
||||
|
||||
return bvp_intraday_features
|
|
@ -3,6 +3,7 @@ from scipy.stats import entropy
|
|||
|
||||
from cr_features.helper_functions import convert_to2d, gsr_features
|
||||
from cr_features.calculate_features import calculate_features
|
||||
from cr_features_helper_methods import get_sample_rate, extract_second_order_features
|
||||
|
||||
|
||||
def getSampleRate(data):
|
||||
|
@ -62,4 +63,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
|||
eda_intraday_features = extract_eda_features_from_intraday_data(eda_intraday_data, intraday_features_to_compute,
|
||||
requested_window_length, time_segment, filter_data_by_segment)
|
||||
|
||||
if calc_windows:
|
||||
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
|
||||
eda_second_order_features = extract_second_order_features(eda_intraday_features, so_features_names)
|
||||
return eda_intraday_features, eda_second_order_features
|
||||
|
||||
return eda_intraday_features
|
|
@ -3,6 +3,7 @@ import numpy as np
|
|||
|
||||
from cr_features.helper_functions import convert_ibi_to2d_time, hrv_features, hrv_freq_features
|
||||
from cr_features.hrv import extract_hrv_features_2d_wrapper
|
||||
from cr_features_helper_methods import get_sample_rate, extract_second_order_features
|
||||
|
||||
import math
|
||||
import sys
|
||||
|
@ -11,14 +12,6 @@ pd.set_option('display.max_rows', 1000)
|
|||
pd.set_option('display.max_columns', None)
|
||||
|
||||
|
||||
def get_sample_rate(data):
|
||||
try:
|
||||
timestamps_diff = data['timestamp'].diff().dropna().mean()
|
||||
except:
|
||||
raise Exception("Error occured while trying to get the mean sample rate from the data.")
|
||||
|
||||
return int(1000/timestamps_diff)
|
||||
|
||||
def extract_ibi_features_from_intraday_data(ibi_intraday_data, features, window_length, time_segment, filter_data_by_segment):
|
||||
ibi_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
|
||||
|
||||
|
@ -30,13 +23,7 @@ def extract_ibi_features_from_intraday_data(ibi_intraday_data, features, window_
|
|||
if not ibi_intraday_data.empty:
|
||||
|
||||
ibi_intraday_features = pd.DataFrame()
|
||||
# np.set_printoptions(threshold=sys.maxsize)
|
||||
# print(ibi_intraday_data.groupby('local_segment').apply(lambda x: math.ceil(x['timings'].iloc[-1])))
|
||||
# nekaj = ibi_intraday_data.groupby('local_segment').apply(lambda x: \
|
||||
# convert_ibi_to2d_time(x[['timings', 'inter_beat_interval']], window_length)[1])
|
||||
|
||||
# sys.exit()
|
||||
|
||||
|
||||
# apply methods from calculate features module
|
||||
if window_length is None:
|
||||
ibi_intraday_features = \
|
||||
|
@ -82,5 +69,10 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
|||
# extract features from intraday data
|
||||
ibi_intraday_features = extract_ibi_features_from_intraday_data(ibi_intraday_data, intraday_features_to_compute,
|
||||
requested_window_length, time_segment, filter_data_by_segment)
|
||||
if calc_windows:
|
||||
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
|
||||
ibi_second_order_features = extract_second_order_features(ibi_intraday_features, so_features_names)
|
||||
return ibi_intraday_features, ibi_second_order_features
|
||||
|
||||
|
||||
return ibi_intraday_features
|
|
@ -3,6 +3,7 @@ from scipy.stats import entropy
|
|||
|
||||
from cr_features.helper_functions import convert_to2d, generic_features
|
||||
from cr_features.calculate_features import calculate_features
|
||||
from cr_features_helper_methods import get_sample_rate, extract_second_order_features
|
||||
|
||||
import sys
|
||||
|
||||
|
@ -61,4 +62,10 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
|||
# extract features from intraday data
|
||||
temperature_intraday_features = extract_temp_features_from_intraday_data(temperature_intraday_data, intraday_features_to_compute,
|
||||
requested_window_length, time_segment, filter_data_by_segment)
|
||||
|
||||
if calc_windows:
|
||||
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
|
||||
temperature_second_order_features = extract_second_order_features(temperature_intraday_features, so_features_names)
|
||||
return temperature_intraday_features, temperature_second_order_features
|
||||
|
||||
return temperature_intraday_features
|
|
@ -22,13 +22,17 @@ else:
|
|||
del sensor_data_files["time_segments_labels"]
|
||||
time_segments_file = snakemake.input["time_segments_labels"]
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=calc_windows)
|
||||
# Calculation over multiple windows in case of Empatica's CR-features
|
||||
if calc_windows:
|
||||
sensor_features.to_csv(snakemake.output[1], index=False)
|
||||
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=False)
|
||||
first_order_features, second_order_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=calc_windows)
|
||||
first_order_features.to_csv(snakemake.output[1], index=False)
|
||||
second_order_features.to_csv(snakemake.output[2], index=False)
|
||||
calc_windows = False
|
||||
elif "empatica" in sensor_key:
|
||||
pd.DataFrame().to_csv(snakemake.output[1], index=False)
|
||||
pd.DataFrame().to_csv(snakemake.output[2], index=False)
|
||||
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=calc_windows)
|
||||
|
||||
|
||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
||||
|
|
|
@ -93,6 +93,8 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file
|
|||
from importlib import import_module, util
|
||||
|
||||
sensor_features = pd.DataFrame(columns=["local_segment"])
|
||||
sensor_fo_features = pd.DataFrame(columns=["local_segment"])
|
||||
sensor_so_features = pd.DataFrame(columns=["local_segment"])
|
||||
time_segments_labels = pd.read_csv(time_segments_file, header=0)
|
||||
if "FEATURES" not in provider:
|
||||
raise ValueError("Provider config[{}][PROVIDERS][{}] is missing a FEATURES attribute in config.yaml".format(sensor_key.upper(), provider_key.upper()))
|
||||
|
@ -106,23 +108,57 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file
|
|||
time_segments_labels["label"] = [""]
|
||||
for time_segment in time_segments_labels["label"]:
|
||||
print("{} Processing {} {} {}".format(rapids_log_tag, sensor_key, provider_key, time_segment))
|
||||
|
||||
features = feature_function(sensor_data_files, time_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes, calc_windows=calc_windows)
|
||||
if not "local_segment" in features.columns:
|
||||
raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)")
|
||||
features.columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features.columns]
|
||||
sensor_features = pd.concat([sensor_features, features], axis=0, sort=False)
|
||||
|
||||
# In case of calc_window = True
|
||||
if isinstance(features, tuple):
|
||||
if not "local_segment" in features[0].columns or not "local_segment" in features[1].columns:
|
||||
raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)")
|
||||
features[0].columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features[0].columns]
|
||||
features[1].columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features[1].columns]
|
||||
if not features[0].empty:
|
||||
sensor_fo_features = pd.concat([sensor_features, features[0]], axis=0, sort=False)
|
||||
if not features[1].empty:
|
||||
sensor_so_features = pd.concat([sensor_features, features[1]], axis=0, sort=False)
|
||||
else:
|
||||
if not "local_segment" in features.columns:
|
||||
raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)")
|
||||
features.columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features.columns]
|
||||
sensor_features = pd.concat([sensor_features, features], axis=0, sort=False)
|
||||
else:
|
||||
for feature in provider["FEATURES"]:
|
||||
sensor_features[feature] = None
|
||||
segment_colums = pd.DataFrame()
|
||||
sensor_features['local_segment'] = sensor_features['local_segment'].str.replace(r'_RR\d+SS', '')
|
||||
split_segemnt_columns = sensor_features["local_segment"].str.split(pat="(.*)#(.*),(.*)", expand=True)
|
||||
new_segment_columns = split_segemnt_columns.iloc[:,1:4] if split_segemnt_columns.shape[1] == 5 else pd.DataFrame(columns=["local_segment_label", "local_segment_start_datetime","local_segment_end_datetime"])
|
||||
segment_colums[["local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]] = new_segment_columns
|
||||
for i in range(segment_colums.shape[1]):
|
||||
sensor_features.insert(1 + i, segment_colums.columns[i], segment_colums[segment_colums.columns[i]])
|
||||
|
||||
if calc_windows:
|
||||
segment_colums = pd.DataFrame()
|
||||
sensor_fo_features['local_segment'] = sensor_fo_features['local_segment'].str.replace(r'_RR\d+SS', '')
|
||||
split_segemnt_columns = sensor_fo_features["local_segment"].str.split(pat="(.*)#(.*),(.*)", expand=True)
|
||||
new_segment_columns = split_segemnt_columns.iloc[:,1:4] if split_segemnt_columns.shape[1] == 5 else pd.DataFrame(columns=["local_segment_label", "local_segment_start_datetime","local_segment_end_datetime"])
|
||||
segment_colums[["local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]] = new_segment_columns
|
||||
for i in range(segment_colums.shape[1]):
|
||||
sensor_fo_features.insert(1 + i, segment_colums.columns[i], segment_colums[segment_colums.columns[i]])
|
||||
|
||||
segment_colums = pd.DataFrame()
|
||||
sensor_so_features['local_segment'] = sensor_so_features['local_segment'].str.replace(r'_RR\d+SS', '')
|
||||
split_segemnt_columns = sensor_so_features["local_segment"].str.split(pat="(.*)#(.*),(.*)", expand=True)
|
||||
new_segment_columns = split_segemnt_columns.iloc[:,1:4] if split_segemnt_columns.shape[1] == 5 else pd.DataFrame(columns=["local_segment_label", "local_segment_start_datetime","local_segment_end_datetime"])
|
||||
segment_colums[["local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]] = new_segment_columns
|
||||
for i in range(segment_colums.shape[1]):
|
||||
sensor_so_features.insert(1 + i, segment_colums.columns[i], segment_colums[segment_colums.columns[i]])
|
||||
|
||||
return sensor_features
|
||||
return sensor_fo_features, sensor_so_features
|
||||
|
||||
else:
|
||||
segment_colums = pd.DataFrame()
|
||||
sensor_features['local_segment'] = sensor_features['local_segment'].str.replace(r'_RR\d+SS', '')
|
||||
split_segemnt_columns = sensor_features["local_segment"].str.split(pat="(.*)#(.*),(.*)", expand=True)
|
||||
new_segment_columns = split_segemnt_columns.iloc[:,1:4] if split_segemnt_columns.shape[1] == 5 else pd.DataFrame(columns=["local_segment_label", "local_segment_start_datetime","local_segment_end_datetime"])
|
||||
segment_colums[["local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]] = new_segment_columns
|
||||
for i in range(segment_colums.shape[1]):
|
||||
sensor_features.insert(1 + i, segment_colums.columns[i], segment_colums[segment_colums.columns[i]])
|
||||
|
||||
return sensor_features
|
||||
|
||||
def run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files):
|
||||
from importlib import import_module, util
|
||||
|
|
Loading…
Reference in New Issue