rapids/tests/scripts/zero_vals.py

51 lines
2.3 KiB
Python

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import compress
# path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv" # all features all participants
# path = "/rapids/data/interim/p03/empatica_accelerometer_features/empatica_accelerometer_python_cr_windows.csv"
path = "/rapids/data/interim/p01/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_cr_windows.csv"
# path = "/rapids/data/interim/p02/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_cr_windows.csv"
# path = "/rapids/data/interim/p02/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_cr_windows.csv"
# path = "/rapids/data/interim/p02/empatica_temperature_features/empatica_temperature_python_cr_windows.csv"
df = pd.read_csv(path)
df_num_peaks_zero = df[df["empatica_electrodermal_activity_cr_numPeaks"] == 0]
columns_num_peaks_zero = df_num_peaks_zero.columns[df_num_peaks_zero.isna().any()].tolist()
df_num_peaks_non_zero_t = df[df["empatica_electrodermal_activity_cr_numPeaks"] != 0]
df_num_peaks_non_zero = df_num_peaks_non_zero_t[columns_num_peaks_zero]
# row_has_NaN = is_NaN. any(axis=1)
# rows_with_NaN = df[row_has_NaN]
# print(rows_with_NaN.size)
pd.set_option('display.max_columns', None)
# # pd.set_option('display.max_rows', None)
# print(df_num_peaks_non_zero)
df_q = pd.DataFrame()
for col in df_num_peaks_non_zero:
df_q[col] = pd.to_numeric(pd.cut(df_num_peaks_non_zero[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
sns.heatmap(df_q)
plt.savefig('eda_windows_p01_window_non_zero.png', bbox_inches='tight')
plt.close()
# Filter columns that do not contain 0
non_zero_cols = list(compress(columns_num_peaks_zero, df_num_peaks_non_zero.all().tolist()))
zero_cols = list(set(columns_num_peaks_zero) - set(non_zero_cols))
print(non_zero_cols, "\n")
print(zero_cols)
# maxPeakAmplitudeChangeBefore
mpacb = df_num_peaks_non_zero_t\
[(df_num_peaks_non_zero_t['empatica_electrodermal_activity_cr_avgPeakAmplitudeChangeBefore'] != 0) \
& (df_num_peaks_non_zero_t['empatica_electrodermal_activity_cr_numPeaks'] != 0)]
print(mpacb['empatica_electrodermal_activity_cr_numPeaks'])
sns.heatmap(mpacb['empatica_electrodermal_activity_cr_numPeaks'])
plt.savefig('maxPeakAmplitudeChangeBefore.png', bbox_inches='tight')