import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from itertools import compress # path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv" # all features all participants # path = "/rapids/data/interim/p03/empatica_accelerometer_features/empatica_accelerometer_python_cr_windows.csv" path = "/rapids/data/interim/p01/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_cr_windows.csv" # path = "/rapids/data/interim/p02/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_cr_windows.csv" # path = "/rapids/data/interim/p02/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_cr_windows.csv" # path = "/rapids/data/interim/p02/empatica_temperature_features/empatica_temperature_python_cr_windows.csv" df = pd.read_csv(path) df_num_peaks_zero = df[df["empatica_electrodermal_activity_cr_numPeaks"] == 0] columns_num_peaks_zero = df_num_peaks_zero.columns[df_num_peaks_zero.isna().any()].tolist() df_num_peaks_non_zero = df[df["empatica_electrodermal_activity_cr_numPeaks"] != 0] df_num_peaks_non_zero = df_num_peaks_non_zero[columns_num_peaks_zero] print(df_num_peaks_non_zero[df_num_peaks_non_zero["empatica_electrodermal_activity_cr_maxPeakAmplitudeChangeBefore"] != 0]) pd.set_option('display.max_columns', None) df_q = pd.DataFrame() for col in df_num_peaks_non_zero: df_q[col] = pd.to_numeric(pd.cut(df_num_peaks_non_zero[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q) plt.savefig('eda_windows_p01_window_values_non_zero_peak_distribution_0thresh.png', bbox_inches='tight') plt.close() # Filter columns that do not contain 0 non_zero_cols = list(compress(columns_num_peaks_zero, df_num_peaks_non_zero.all().tolist())) zero_cols = list(set(columns_num_peaks_zero) - set(non_zero_cols)) print(non_zero_cols, "\n") print(zero_cols)