rapids/tests/scripts/missing_vals.py

40 lines
1.5 KiB
Python

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
participant = "p01"
all_sensors = ["eda", "ibi", "temp", "acc"]
for sensor in all_sensors:
if sensor == "eda":
path = f"/rapids/data/interim/{participant}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_cr_windows.csv"
elif sensor == "bvp":
path = f"/rapids/data/interim/{participant}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_cr_windows.csv"
elif sensor == "ibi":
path = f"/rapids/data/interim/{participant}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_cr_windows.csv"
elif sensor == "acc":
path = f"/rapids/data/interim/{participant}/empatica_accelerometer_features/empatica_accelerometer_python_cr_windows.csv"
elif sensor == "temp":
path = f"/rapids/data/interim/{participant}/empatica_temperature_features/empatica_temperature_python_cr_windows.csv"
else:
path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv" # all features all participants
df = pd.read_csv(path)
print(df)
is_NaN = df.isnull()
row_has_NaN = is_NaN.any(axis=1)
rows_with_NaN = df[row_has_NaN]
print("All rows:", len(df.index))
print("\nCount NaN vals:", rows_with_NaN.size)
print("\nDf mean:")
print(df.mean())
sns.heatmap(df.isna(), cbar=False)
plt.savefig(f'{sensor}_{participant}_windows_NaN.png', bbox_inches='tight')