import pandas as pd from utils.utils import fetch_provider_features, run_provider_cleaning_script from sklearn.preprocessing import StandardScaler import sys sensor_data_files = dict(snakemake.input) provider = snakemake.params["provider"] provider_key = snakemake.params["provider_key"] sensor_key = snakemake.params["sensor_key"] calc_windows = True if (provider.get("WINDOWS", False) and provider["WINDOWS"].get("COMPUTE", False)) else False if sensor_key == "all_cleaning_individual" or sensor_key == "all_cleaning_overall": # Data cleaning sensor_features = run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files) else: # Extract sensor features del sensor_data_files["time_segments_labels"] time_segments_file = snakemake.input["time_segments_labels"] if calc_windows: window_features, second_order_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=True) # Z-score SO features by columns if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False): second_order_features[second_order_features.columns[4:]] = StandardScaler().fit_transform(second_order_features[second_order_features.columns[4:]]) window_features.to_csv(snakemake.output[1], index=False) second_order_features.to_csv(snakemake.output[0], index=False) elif "empatica" in sensor_key: pd.DataFrame().to_csv(snakemake.output[1], index=False) sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=False) if not calc_windows: sensor_features.to_csv(snakemake.output[0], index=False)