2022-06-13 16:12:03 +02:00
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
|
|
|
|
|
import sys
|
|
|
|
|
|
|
|
sensor_data_files = dict(snakemake.input)
|
|
|
|
|
|
|
|
provider = snakemake.params["provider"]
|
|
|
|
provider_key = snakemake.params["provider_key"]
|
|
|
|
sensor_key = snakemake.params["sensor_key"]
|
|
|
|
|
|
|
|
pd.set_option('display.max_columns', None)
|
|
|
|
|
|
|
|
if provider_key == "cr":
|
2022-06-13 20:17:30 +02:00
|
|
|
sys.path.append('/rapids/src/features/')
|
|
|
|
from cr_features_helper_methods import extract_second_order_features
|
|
|
|
|
2022-06-13 16:12:03 +02:00
|
|
|
provider_main = snakemake.params["provider_main"]
|
|
|
|
prefix = sensor_key + "_" + provider_key + "_"
|
|
|
|
|
|
|
|
windows_features_data = pd.read_csv(sensor_data_files["windows_features_data"])
|
2022-06-13 20:17:30 +02:00
|
|
|
excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime', prefix + "level_1"]
|
2022-07-08 12:40:45 +02:00
|
|
|
|
|
|
|
if windows_features_data.empty:
|
|
|
|
windows_features_data.to_csv(snakemake.output[1], index=False)
|
|
|
|
windows_features_data.to_csv(snakemake.output[0], index=False)
|
|
|
|
else:
|
2022-07-20 15:51:22 +02:00
|
|
|
windows_features_data.loc[:, ~windows_features_data.columns.isin(excluded_columns)] = StandardScaler().fit_transform(windows_features_data.loc[:, ~windows_features_data.columns.isin(excluded_columns)])
|
2022-06-13 16:12:03 +02:00
|
|
|
|
2022-07-08 12:40:45 +02:00
|
|
|
windows_features_data.to_csv(snakemake.output[1], index=False)
|
2022-06-13 16:12:03 +02:00
|
|
|
|
2022-07-08 12:40:45 +02:00
|
|
|
if provider_main["WINDOWS"]["COMPUTE"] and "SECOND_ORDER_FEATURES" in provider_main["WINDOWS"]:
|
|
|
|
so_features_names = provider_main["WINDOWS"]["SECOND_ORDER_FEATURES"]
|
|
|
|
windows_so_features_data = extract_second_order_features(windows_features_data, so_features_names, prefix)
|
|
|
|
windows_so_features_data.to_csv(snakemake.output[0], index=False)
|
2022-07-20 15:51:22 +02:00
|
|
|
else:
|
|
|
|
pd.DataFrame().to_csv(snakemake.output[0], index=False)
|
2022-06-13 20:17:30 +02:00
|
|
|
|
|
|
|
else:
|
2022-07-20 15:51:22 +02:00
|
|
|
for sensor_features in sensor_data_files["sensor_features"]:
|
|
|
|
if "/" + sensor_key + ".csv" in sensor_features:
|
|
|
|
sensor_data = pd.read_csv(sensor_features)
|
|
|
|
excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime']
|
|
|
|
|
|
|
|
if not sensor_data.empty:
|
|
|
|
sensor_data.loc[:, ~sensor_data.columns.isin(excluded_columns)] = StandardScaler().fit_transform(sensor_data.loc[:, ~sensor_data.columns.isin(excluded_columns)])
|
|
|
|
|
|
|
|
sensor_data.to_csv(snakemake.output[0], index=False)
|
|
|
|
break
|