Implementation of the second order features.

sociality-task
Primoz 2022-04-25 13:07:03 +00:00
parent 66451160e9
commit 5638367999
12 changed files with 147 additions and 48 deletions

View File

@ -493,6 +493,7 @@ EMPATICA_ACCELEROMETER:
WINDOWS:
COMPUTE: True
WINDOW_LENGTH: 10 # specify window length in seconds
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
SRC_SCRIPT: src/features/empatica_accelerometer/cr/main.py
@ -510,11 +511,11 @@ EMPATICA_TEMPERATURE:
CONTAINER: TEMP
PROVIDERS:
DBDP:
COMPUTE: False
COMPUTE: True
FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
CR:
COMPUTE: False
COMPUTE: True
FEATURES: ["autocorrelations", "countAboveMean", "countBelowMean", "maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean",
"longestStrikeBelowMean", "stdDev", "median", "meanChange", "numberOfZeroCrossings", "absEnergy", "linearTrendSlope",
"ratioBeyondRSigma", "binnedEntropy", "numOfPeaksAutocorr", "numberOfZeroCrossingsAutocorr", "areaAutocorr",
@ -523,6 +524,7 @@ EMPATICA_TEMPERATURE:
WINDOWS:
COMPUTE: True
WINDOW_LENGTH: 90 # specify window length in seconds
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
SRC_SCRIPT: src/features/empatica_temperature/cr/main.py
# See https://www.rapids.science/latest/features/empatica-electrodermal-activity/
@ -530,11 +532,11 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
CONTAINER: EDA
PROVIDERS:
DBDP:
COMPUTE: False
COMPUTE: True
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py
CR:
COMPUTE: False
COMPUTE: True
FEATURES: ['mean', 'std', 'q25', 'q75', 'qd', 'deriv', 'power', 'numPeaks', 'ratePeaks', 'powerPeaks', 'sumPosDeriv', 'propPosDeriv', 'derivTonic',
'sigTonicDifference', 'freqFeats','maxPeakAmplitudeChangeBefore', 'maxPeakAmplitudeChangeAfter', 'avgPeakAmplitudeChangeBefore',
'avgPeakAmplitudeChangeAfter', 'avgPeakChangeRatio', 'maxPeakIncreaseTime', 'maxPeakDecreaseTime', 'maxPeakDuration', 'maxPeakChangeRatio',
@ -543,6 +545,7 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
WINDOWS:
COMPUTE: True
WINDOW_LENGTH: 80 # specify window length in seconds
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
SRC_SCRIPT: src/features/empatica_electrodermal_activity/cr/main.py
# See https://www.rapids.science/latest/features/empatica-blood-volume-pulse/
@ -550,16 +553,17 @@ EMPATICA_BLOOD_VOLUME_PULSE:
CONTAINER: BVP
PROVIDERS:
DBDP:
COMPUTE: False
COMPUTE: True
FEATURES: ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"]
SRC_SCRIPT: src/features/empatica_blood_volume_pulse/dbdp/main.py
CR:
COMPUTE: False
COMPUTE: True
FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR', # Time features
'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features
WINDOWS:
COMPUTE: True
WINDOW_LENGTH: 10 # specify window length in seconds
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
SRC_SCRIPT: src/features/empatica_blood_volume_pulse/cr/main.py
# See https://www.rapids.science/latest/features/empatica-inter-beat-interval/
@ -576,7 +580,8 @@ EMPATICA_INTER_BEAT_INTERVAL:
'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features
WINDOWS:
COMPUTE: True
WINDOW_LENGTH: 120 # specify window length in seconds
WINDOW_LENGTH: 2000 # specify window length in seconds
SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'max', 'min']
SRC_SCRIPT: src/features/empatica_inter_beat_interval/cr/main.py
# See https://www.rapids.science/latest/features/empatica-tags/

View File

@ -110,12 +110,13 @@ dependencies:
- bidict==0.22.0
- biosppy==0.8.0
- cached-property==1.5.2
- calculatingfeatures==0.1.1
- configargparse==0.15.1
- cr-features==0.1.8
- cycler==0.11.0
- decorator==4.4.2
- fonttools==4.31.2
- fonttools==4.33.2
- h5py==3.6.0
- hmmlearn==0.2.7
- ipython-genutils==0.2.0
- jupyter-core==4.6.3
- kiwisolver==1.4.2
@ -124,7 +125,7 @@ dependencies:
- opencv-python==4.5.5.64
- packaging==21.3
- peakutils==1.3.3
- pillow==9.0.1
- pillow==9.1.0
- pulp==2.4
- pyparsing==2.4.7
- pyrsistent==0.15.5
@ -134,5 +135,5 @@ dependencies:
- snakemake==5.30.2
- toposort==1.5
- traitlets==4.3.3
- typing-extensions==4.1.1
- typing-extensions==4.2.0
prefix: /opt/conda/envs/rapids

View File

@ -771,7 +771,8 @@ rule empatica_accelerometer_python_features:
sensor_key = "empatica_accelerometer"
output:
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}.csv",
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}_windows.csv"
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}_windows.csv",
"data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}_windows_SO_features.csv"
script:
"../src/features/entry.py"
@ -798,7 +799,8 @@ rule empatica_heartrate_python_features:
sensor_key = "empatica_heartrate"
output:
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}.csv",
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}_windows.csv"
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}_windows.csv",
"data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}_windows_SO_features.csv"
script:
"../src/features/entry.py"
@ -825,7 +827,8 @@ rule empatica_temperature_python_features:
sensor_key = "empatica_temperature"
output:
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}.csv",
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}_windows.csv"
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}_windows.csv",
"data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}_windows_SO_features.csv"
script:
"../src/features/entry.py"
@ -852,7 +855,8 @@ rule empatica_electrodermal_activity_python_features:
sensor_key = "empatica_electrodermal_activity"
output:
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}.csv",
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}_windows.csv"
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}_windows.csv",
"data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}_windows_SO_features.csv"
script:
"../src/features/entry.py"
@ -879,7 +883,8 @@ rule empatica_blood_volume_pulse_python_features:
sensor_key = "empatica_blood_volume_pulse"
output:
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}.csv",
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows.csv"
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows.csv",
"data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows_SO_features.csv"
script:
"../src/features/entry.py"
@ -906,7 +911,8 @@ rule empatica_inter_beat_interval_python_features:
sensor_key = "empatica_inter_beat_interval"
output:
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}.csv",
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}_windows.csv"
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}_windows.csv",
"data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}_windows_SO_features.csv"
script:
"../src/features/entry.py"

View File

View File

@ -0,0 +1,30 @@
import pandas as pd
def extract_second_order_features(intraday_features, so_features_names):
if not intraday_features.empty:
so_features = pd.DataFrame()
if "mean" in so_features_names:
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).mean().add_suffix("_SO_mean")], axis=1)
if "median" in so_features_names:
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).mean().add_suffix("_SO_median")], axis=1)
if "sd" in so_features_names:
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).std().add_suffix("_SO_sd")], axis=1)
if "max" in so_features_names:
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).max().add_suffix("_SO_max")], axis=1)
if "min" in so_features_names:
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).min().add_suffix("_SO_min")], axis=1)
so_features.reset_index(inplace=True)
else:
so_features = pd.DataFrame(columns=["local_segment"])
return so_features
def get_sample_rate(data):
try:
timestamps_diff = data['timestamp'].diff().dropna().mean()
except:
raise Exception("Error occured while trying to get the mean sample rate from the data.")
return int(1000/timestamps_diff)

View File

@ -3,6 +3,7 @@ from scipy.stats import entropy
from cr_features.helper_functions import convert_to2d, accelerometer_features, frequency_features
from cr_features.calculate_features import calculate_features
from cr_features_helper_methods import get_sample_rate, extract_second_order_features
import sys
@ -69,4 +70,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
acc_intraday_features = extract_acc_features_from_intraday_data(acc_intraday_data, intraday_features_to_compute,
requested_window_length, time_segment, filter_data_by_segment)
if calc_windows:
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
acc_second_order_features = extract_second_order_features(acc_intraday_features, so_features_names)
return acc_intraday_features, acc_second_order_features
return acc_intraday_features

View File

@ -3,6 +3,7 @@ from scipy.stats import entropy
from cr_features.helper_functions import convert_to2d, hrv_features, hrv_freq_features
from cr_features.hrv import extract_hrv_features_2d_wrapper
from cr_features_helper_methods import get_sample_rate, extract_second_order_features
import sys
@ -74,4 +75,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
bvp_intraday_features = extract_bvp_features_from_intraday_data(bvp_intraday_data, intraday_features_to_compute,
requested_window_length, time_segment, filter_data_by_segment)
if calc_windows:
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
bvp_second_order_features = extract_second_order_features(bvp_intraday_features, so_features_names)
return bvp_intraday_features, bvp_second_order_features
return bvp_intraday_features

View File

@ -3,6 +3,7 @@ from scipy.stats import entropy
from cr_features.helper_functions import convert_to2d, gsr_features
from cr_features.calculate_features import calculate_features
from cr_features_helper_methods import get_sample_rate, extract_second_order_features
def getSampleRate(data):
@ -62,4 +63,9 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
eda_intraday_features = extract_eda_features_from_intraday_data(eda_intraday_data, intraday_features_to_compute,
requested_window_length, time_segment, filter_data_by_segment)
if calc_windows:
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
eda_second_order_features = extract_second_order_features(eda_intraday_features, so_features_names)
return eda_intraday_features, eda_second_order_features
return eda_intraday_features

View File

@ -3,6 +3,7 @@ import numpy as np
from cr_features.helper_functions import convert_ibi_to2d_time, hrv_features, hrv_freq_features
from cr_features.hrv import extract_hrv_features_2d_wrapper
from cr_features_helper_methods import get_sample_rate, extract_second_order_features
import math
import sys
@ -11,14 +12,6 @@ pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', None)
def get_sample_rate(data):
try:
timestamps_diff = data['timestamp'].diff().dropna().mean()
except:
raise Exception("Error occured while trying to get the mean sample rate from the data.")
return int(1000/timestamps_diff)
def extract_ibi_features_from_intraday_data(ibi_intraday_data, features, window_length, time_segment, filter_data_by_segment):
ibi_intraday_features = pd.DataFrame(columns=["local_segment"] + features)
@ -30,13 +23,7 @@ def extract_ibi_features_from_intraday_data(ibi_intraday_data, features, window_
if not ibi_intraday_data.empty:
ibi_intraday_features = pd.DataFrame()
# np.set_printoptions(threshold=sys.maxsize)
# print(ibi_intraday_data.groupby('local_segment').apply(lambda x: math.ceil(x['timings'].iloc[-1])))
# nekaj = ibi_intraday_data.groupby('local_segment').apply(lambda x: \
# convert_ibi_to2d_time(x[['timings', 'inter_beat_interval']], window_length)[1])
# sys.exit()
# apply methods from calculate features module
if window_length is None:
ibi_intraday_features = \
@ -82,5 +69,10 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
# extract features from intraday data
ibi_intraday_features = extract_ibi_features_from_intraday_data(ibi_intraday_data, intraday_features_to_compute,
requested_window_length, time_segment, filter_data_by_segment)
if calc_windows:
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
ibi_second_order_features = extract_second_order_features(ibi_intraday_features, so_features_names)
return ibi_intraday_features, ibi_second_order_features
return ibi_intraday_features

View File

@ -3,6 +3,7 @@ from scipy.stats import entropy
from cr_features.helper_functions import convert_to2d, generic_features
from cr_features.calculate_features import calculate_features
from cr_features_helper_methods import get_sample_rate, extract_second_order_features
import sys
@ -61,4 +62,10 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
# extract features from intraday data
temperature_intraday_features = extract_temp_features_from_intraday_data(temperature_intraday_data, intraday_features_to_compute,
requested_window_length, time_segment, filter_data_by_segment)
if calc_windows:
so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"]
temperature_second_order_features = extract_second_order_features(temperature_intraday_features, so_features_names)
return temperature_intraday_features, temperature_second_order_features
return temperature_intraday_features

View File

@ -22,13 +22,17 @@ else:
del sensor_data_files["time_segments_labels"]
time_segments_file = snakemake.input["time_segments_labels"]
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=calc_windows)
# Calculation over multiple windows in case of Empatica's CR-features
if calc_windows:
sensor_features.to_csv(snakemake.output[1], index=False)
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=False)
first_order_features, second_order_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=calc_windows)
first_order_features.to_csv(snakemake.output[1], index=False)
second_order_features.to_csv(snakemake.output[2], index=False)
calc_windows = False
elif "empatica" in sensor_key:
pd.DataFrame().to_csv(snakemake.output[1], index=False)
pd.DataFrame().to_csv(snakemake.output[2], index=False)
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=calc_windows)
sensor_features.to_csv(snakemake.output[0], index=False)

View File

@ -93,6 +93,8 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file
from importlib import import_module, util
sensor_features = pd.DataFrame(columns=["local_segment"])
sensor_fo_features = pd.DataFrame(columns=["local_segment"])
sensor_so_features = pd.DataFrame(columns=["local_segment"])
time_segments_labels = pd.read_csv(time_segments_file, header=0)
if "FEATURES" not in provider:
raise ValueError("Provider config[{}][PROVIDERS][{}] is missing a FEATURES attribute in config.yaml".format(sensor_key.upper(), provider_key.upper()))
@ -106,23 +108,57 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file
time_segments_labels["label"] = [""]
for time_segment in time_segments_labels["label"]:
print("{} Processing {} {} {}".format(rapids_log_tag, sensor_key, provider_key, time_segment))
features = feature_function(sensor_data_files, time_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes, calc_windows=calc_windows)
if not "local_segment" in features.columns:
raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)")
features.columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features.columns]
sensor_features = pd.concat([sensor_features, features], axis=0, sort=False)
# In case of calc_window = True
if isinstance(features, tuple):
if not "local_segment" in features[0].columns or not "local_segment" in features[1].columns:
raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)")
features[0].columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features[0].columns]
features[1].columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features[1].columns]
if not features[0].empty:
sensor_fo_features = pd.concat([sensor_features, features[0]], axis=0, sort=False)
if not features[1].empty:
sensor_so_features = pd.concat([sensor_features, features[1]], axis=0, sort=False)
else:
if not "local_segment" in features.columns:
raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)")
features.columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features.columns]
sensor_features = pd.concat([sensor_features, features], axis=0, sort=False)
else:
for feature in provider["FEATURES"]:
sensor_features[feature] = None
segment_colums = pd.DataFrame()
sensor_features['local_segment'] = sensor_features['local_segment'].str.replace(r'_RR\d+SS', '')
split_segemnt_columns = sensor_features["local_segment"].str.split(pat="(.*)#(.*),(.*)", expand=True)
new_segment_columns = split_segemnt_columns.iloc[:,1:4] if split_segemnt_columns.shape[1] == 5 else pd.DataFrame(columns=["local_segment_label", "local_segment_start_datetime","local_segment_end_datetime"])
segment_colums[["local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]] = new_segment_columns
for i in range(segment_colums.shape[1]):
sensor_features.insert(1 + i, segment_colums.columns[i], segment_colums[segment_colums.columns[i]])
if calc_windows:
segment_colums = pd.DataFrame()
sensor_fo_features['local_segment'] = sensor_fo_features['local_segment'].str.replace(r'_RR\d+SS', '')
split_segemnt_columns = sensor_fo_features["local_segment"].str.split(pat="(.*)#(.*),(.*)", expand=True)
new_segment_columns = split_segemnt_columns.iloc[:,1:4] if split_segemnt_columns.shape[1] == 5 else pd.DataFrame(columns=["local_segment_label", "local_segment_start_datetime","local_segment_end_datetime"])
segment_colums[["local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]] = new_segment_columns
for i in range(segment_colums.shape[1]):
sensor_fo_features.insert(1 + i, segment_colums.columns[i], segment_colums[segment_colums.columns[i]])
segment_colums = pd.DataFrame()
sensor_so_features['local_segment'] = sensor_so_features['local_segment'].str.replace(r'_RR\d+SS', '')
split_segemnt_columns = sensor_so_features["local_segment"].str.split(pat="(.*)#(.*),(.*)", expand=True)
new_segment_columns = split_segemnt_columns.iloc[:,1:4] if split_segemnt_columns.shape[1] == 5 else pd.DataFrame(columns=["local_segment_label", "local_segment_start_datetime","local_segment_end_datetime"])
segment_colums[["local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]] = new_segment_columns
for i in range(segment_colums.shape[1]):
sensor_so_features.insert(1 + i, segment_colums.columns[i], segment_colums[segment_colums.columns[i]])
return sensor_features
return sensor_fo_features, sensor_so_features
else:
segment_colums = pd.DataFrame()
sensor_features['local_segment'] = sensor_features['local_segment'].str.replace(r'_RR\d+SS', '')
split_segemnt_columns = sensor_features["local_segment"].str.split(pat="(.*)#(.*),(.*)", expand=True)
new_segment_columns = split_segemnt_columns.iloc[:,1:4] if split_segemnt_columns.shape[1] == 5 else pd.DataFrame(columns=["local_segment_label", "local_segment_start_datetime","local_segment_end_datetime"])
segment_colums[["local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]] = new_segment_columns
for i in range(segment_colums.shape[1]):
sensor_features.insert(1 + i, segment_colums.columns[i], segment_colums[segment_colums.columns[i]])
return sensor_features
def run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files):
from importlib import import_module, util