rapids/tests/scripts/missing_vals.py

29 lines
1.1 KiB
Python

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
participant = "p02"
# path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv" # all features all participants
# path = "/rapids/data/interim/p03/empatica_accelerometer_features/empatica_accelerometer_python_cr_windows.csv"
path = f"/rapids/data/interim/{participant}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_cr_windows.csv"
# path = "/rapids/data/interim/p02/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_cr_windows.csv"
# path = "/rapids/data/interim/p02/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_cr_windows.csv"
# path = "/rapids/data/interim/p02/empatica_temperature_features/empatica_temperature_python_cr_windows.csv"
df = pd.read_csv(path)
print(df)
is_NaN = df.isnull()
row_has_NaN = is_NaN.any(axis=1)
rows_with_NaN = df[row_has_NaN]
print("All rows:", len(df.index))
print("\nCount NaN vals:", rows_with_NaN.size)
print("\nDf mean:")
print(df.mean())
sns.heatmap(df.isna(), cbar=False)
plt.savefig(f'eda_{participant}_windows_NaN.png', bbox_inches='tight')