import pandas as pd import seaborn as sns import matplotlib.pyplot as plt path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv" df = pd.read_csv(path) # Bluetooth doryab_cols_bt = [col for col in df.columns if "bluetooth_doryab" in col] df_bt = df[doryab_cols_bt] print(len(doryab_cols_bt)) print(df_bt) df_bt = df_bt.dropna(axis=0, how="all") sns.heatmap(df_bt.isna(), xticklabels=1) plt.savefig(f'bluetooth_doryab_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_bt: df_q[col] = pd.to_numeric(pd.cut(df_bt[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_bluetooth_doryab_values', bbox_inches='tight') plt.close() # Location doryab_cols_loc = [col for col in df.columns if "locations_doryab" in col] df_loc = df[doryab_cols_loc] print(len(doryab_cols_loc)) print(df_loc) df_loc = df_loc.dropna(axis=0, how="all").reset_index(drop=True) print(df_loc) sns.heatmap(df_loc.isna()) plt.savefig(f'locations_doryab_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_loc: df_q[col] = pd.to_numeric(pd.cut(df_loc[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_location_doryab_values', bbox_inches='tight') plt.close() plt.plot(df_loc['phone_locations_doryab_loglocationvariance']) plt.savefig(f'phone_locations_doryab_loglocationvariance', bbox_inches='tight') plt.close() # Phone calls & messages calls_sms_cols = [col for col in df.columns if "phone_calls" in col or "phone_messages" in col] df_cs= df[calls_sms_cols] print(len(calls_sms_cols)) print(df_cs) df_cs = df_cs.dropna(axis=0, how="all") sns.heatmap(df_cs.isna(), xticklabels=1) plt.savefig(f'calls_sms_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_cs: df_q[col] = pd.to_numeric(pd.cut(df_cs[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_calls_sms_values', bbox_inches='tight') plt.close() # All features print(len(df)) print(df) df = df.dropna(axis=0, how="all") df = df.dropna(axis=1, how="all") sns.heatmap(df.isna()) plt.savefig(f'all_features', bbox_inches='tight') print(df.columns[df.isna().all()].tolist()) print("All NaNs:", df.isna().sum().sum()) print("Df shape NaNs:", df.shape)