rapids/src/features/entry.py

52 lines
2.3 KiB
Python

import pandas as pd
from utils.utils import fetch_provider_features, run_provider_cleaning_script
from sklearn.preprocessing import StandardScaler
import sys
sensor_data_files = dict(snakemake.input)
provider = snakemake.params["provider"]
provider_key = snakemake.params["provider_key"]
sensor_key = snakemake.params["sensor_key"]
calc_windows = True if (provider.get("WINDOWS", False) and provider["WINDOWS"].get("COMPUTE", False)) else False
if sensor_key == "all_cleaning_individual" or sensor_key == "all_cleaning_overall":
# Data cleaning
sensor_features = run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files)
else:
# Extract sensor features
del sensor_data_files["time_segments_labels"]
time_segments_file = snakemake.input["time_segments_labels"]
if calc_windows:
window_features, second_order_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=True)
# # Get basic stats from all participant's windows
# fo_means_stds = pd.DataFrame({"mean": window_features.mean(), "median": window_features.median(), "sd": window_features.std(),
# "min": window_features.min(), "max": window_features.max()})
# fo_columns = window_features.columns.values[5:]
# fo_columns_z_score = [col + "_zscore" for col in fo_columns]
# window_features[fo_columns_z_score] = StandardScaler().fit_transform(window_features[fo_columns])
# print(fo_means_stds)
# Z-score SO features by columns
# if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False):
# for indx, fo_mean_std in fo_means_stds.iterrows():
# print(indx, fo_mean_std)
# sys.exit()
window_features.to_csv(snakemake.output[1], index=False)
second_order_features.to_csv(snakemake.output[0], index=False)
elif "empatica" in sensor_key:
pd.DataFrame().to_csv(snakemake.output[1], index=False)
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=False)
if not calc_windows:
sensor_features.to_csv(snakemake.output[0], index=False)