import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler import sys sensor_data_files = dict(snakemake.input) provider = snakemake.params["provider"] provider_key = snakemake.params["provider_key"] sensor_key = snakemake.params["sensor_key"] pd.set_option('display.max_columns', None) if provider_key == "cr": sys.path.append('/rapids/src/features/') from cr_features_helper_methods import extract_second_order_features provider_main = snakemake.params["provider_main"] prefix = sensor_key + "_" + provider_key + "_" windows_features_data = pd.read_csv(sensor_data_files["windows_features_data"]) excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime', prefix + "level_1"] if windows_features_data.empty: windows_features_data.to_csv(snakemake.output[1], index=False) windows_features_data.to_csv(snakemake.output[0], index=False) else: windows_features_data.loc[:, ~windows_features_data.columns.isin(excluded_columns)] = StandardScaler().fit_transform(windows_features_data.loc[:, ~windows_features_data.columns.isin(excluded_columns)]) windows_features_data.to_csv(snakemake.output[1], index=False) if provider_main["WINDOWS"]["COMPUTE"] and "SECOND_ORDER_FEATURES" in provider_main["WINDOWS"]: so_features_names = provider_main["WINDOWS"]["SECOND_ORDER_FEATURES"] windows_so_features_data = extract_second_order_features(windows_features_data, so_features_names, prefix) windows_so_features_data.to_csv(snakemake.output[0], index=False) else: pd.DataFrame().to_csv(snakemake.output[0], index=False) else: for sensor_features in sensor_data_files["sensor_features"]: if "/" + sensor_key + ".csv" in sensor_features: sensor_data = pd.read_csv(sensor_features) excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime'] if not sensor_data.empty: sensor_data.loc[:, ~sensor_data.columns.isin(excluded_columns)] = StandardScaler().fit_transform(sensor_data.loc[:, ~sensor_data.columns.isin(excluded_columns)]) sensor_data.to_csv(snakemake.output[0], index=False) break