diff --git a/src/features/standardization/main.py b/src/features/standardization/main.py deleted file mode 100644 index d91fca6d..00000000 --- a/src/features/standardization/main.py +++ /dev/null @@ -1,50 +0,0 @@ -import pandas as pd -import numpy as np -from sklearn.preprocessing import StandardScaler - -import sys - -sensor_data_files = dict(snakemake.input) - -provider = snakemake.params["provider"] -provider_key = snakemake.params["provider_key"] -sensor_key = snakemake.params["sensor_key"] - -pd.set_option('display.max_columns', None) - -if provider_key == "cr": - sys.path.append('/rapids/src/features/') - from cr_features_helper_methods import extract_second_order_features - - provider_main = snakemake.params["provider_main"] - prefix = sensor_key + "_" + provider_key + "_" - - windows_features_data = pd.read_csv(sensor_data_files["windows_features_data"]) - excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime', prefix + "level_1"] - - if windows_features_data.empty: - windows_features_data.to_csv(snakemake.output[1], index=False) - windows_features_data.to_csv(snakemake.output[0], index=False) - else: - windows_features_data.loc[:, ~windows_features_data.columns.isin(excluded_columns)] = StandardScaler().fit_transform(windows_features_data.loc[:, ~windows_features_data.columns.isin(excluded_columns)]) - - windows_features_data.to_csv(snakemake.output[1], index=False) - - if provider_main["WINDOWS"]["COMPUTE"] and "SECOND_ORDER_FEATURES" in provider_main["WINDOWS"]: - so_features_names = provider_main["WINDOWS"]["SECOND_ORDER_FEATURES"] - windows_so_features_data = extract_second_order_features(windows_features_data, so_features_names, prefix) - windows_so_features_data.to_csv(snakemake.output[0], index=False) - else: - pd.DataFrame().to_csv(snakemake.output[0], index=False) - -else: - for sensor_features in sensor_data_files["sensor_features"]: - if "/" + sensor_key + ".csv" in sensor_features: - sensor_data = pd.read_csv(sensor_features) - excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime'] - - if not sensor_data.empty: - sensor_data.loc[:, ~sensor_data.columns.isin(excluded_columns)] = StandardScaler().fit_transform(sensor_data.loc[:, ~sensor_data.columns.isin(excluded_columns)]) - - sensor_data.to_csv(snakemake.output[0], index=False) - break \ No newline at end of file