52 lines
2.3 KiB
Python
52 lines
2.3 KiB
Python
import pandas as pd
|
|
from utils.utils import fetch_provider_features, run_provider_cleaning_script
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
|
import sys
|
|
|
|
sensor_data_files = dict(snakemake.input)
|
|
|
|
provider = snakemake.params["provider"]
|
|
provider_key = snakemake.params["provider_key"]
|
|
sensor_key = snakemake.params["sensor_key"]
|
|
|
|
calc_windows = True if (provider.get("WINDOWS", False) and provider["WINDOWS"].get("COMPUTE", False)) else False
|
|
|
|
if sensor_key == "all_cleaning_individual" or sensor_key == "all_cleaning_overall":
|
|
# Data cleaning
|
|
sensor_features = run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files)
|
|
else:
|
|
# Extract sensor features
|
|
del sensor_data_files["time_segments_labels"]
|
|
time_segments_file = snakemake.input["time_segments_labels"]
|
|
|
|
if calc_windows:
|
|
window_features, second_order_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=True)
|
|
|
|
# # Get basic stats from all participant's windows
|
|
# fo_means_stds = pd.DataFrame({"mean": window_features.mean(), "median": window_features.median(), "sd": window_features.std(),
|
|
# "min": window_features.min(), "max": window_features.max()})
|
|
|
|
# fo_columns = window_features.columns.values[5:]
|
|
# fo_columns_z_score = [col + "_zscore" for col in fo_columns]
|
|
# window_features[fo_columns_z_score] = StandardScaler().fit_transform(window_features[fo_columns])
|
|
|
|
# print(fo_means_stds)
|
|
|
|
# Z-score SO features by columns
|
|
# if provider["WINDOWS"].get("STANDARDIZE_SO_FEATURES", False):
|
|
# for indx, fo_mean_std in fo_means_stds.iterrows():
|
|
# print(indx, fo_mean_std)
|
|
|
|
# sys.exit()
|
|
window_features.to_csv(snakemake.output[1], index=False)
|
|
second_order_features.to_csv(snakemake.output[0], index=False)
|
|
|
|
elif "empatica" in sensor_key:
|
|
pd.DataFrame().to_csv(snakemake.output[1], index=False)
|
|
|
|
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=False)
|
|
|
|
if not calc_windows:
|
|
sensor_features.to_csv(snakemake.output[0], index=False)
|