import pandas as pd import seaborn as sns import matplotlib.pyplot as plt path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv" df = pd.read_csv(path) # activity_recognition cols = [col for col in df.columns if "activity_recognition" in col] df_x = df[cols] print(len(cols)) print(df_x) df_x = df_x.dropna(axis=0, how="all") sns.heatmap(df_x.isna(), xticklabels=1) plt.savefig(f'activity_recognition_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_x: df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_activity_recognition_values', bbox_inches='tight') plt.close() # applications_foreground cols = [col for col in df.columns if "applications_foreground" in col] df_x = df[cols] print(len(cols)) print(df_x) df_x = df_x.dropna(axis=0, how="all") sns.heatmap(df_x.isna(), xticklabels=1) plt.savefig(f'applications_foreground_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_x: df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_applications_foreground_values', bbox_inches='tight') plt.close() # battery cols = [col for col in df.columns if "phone_battery" in col] df_x = df[cols] print(len(cols)) print(df_x) df_x = df_x.dropna(axis=0, how="all") sns.heatmap(df_x.isna(), xticklabels=1) plt.savefig(f'phone_battery_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_x: df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_phone_battery_values', bbox_inches='tight') plt.close() # bluetooth_doryab cols = [col for col in df.columns if "bluetooth_doryab" in col] df_x = df[cols] print(len(cols)) print(df_x) df_x = df_x.dropna(axis=0, how="all") sns.heatmap(df_x.isna(), xticklabels=1) plt.savefig(f'bluetooth_doryab_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_x: df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_bluetooth_doryab_values', bbox_inches='tight') plt.close() # bluetooth_rapids cols = [col for col in df.columns if "bluetooth_rapids" in col] df_x = df[cols] print(len(cols)) print(df_x) df_x = df_x.dropna(axis=0, how="all") sns.heatmap(df_x.isna(), xticklabels=1) plt.savefig(f'bluetooth_rapids_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_x: df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_bluetooth_rapids_values', bbox_inches='tight') plt.close() # calls cols = [col for col in df.columns if "phone_calls" in col] df_x = df[cols] print(len(cols)) print(df_x) df_x = df_x.dropna(axis=0, how="all") sns.heatmap(df_x.isna(), xticklabels=1) plt.savefig(f'phone_calls_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_x: df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_phone_calls_values', bbox_inches='tight') plt.close() # data_yield cols = [col for col in df.columns if "data_yield" in col] df_x = df[cols] print(len(cols)) print(df_x) df_x = df_x.dropna(axis=0, how="all") sns.heatmap(df_x.isna(), xticklabels=1) plt.savefig(f'data_yield_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_x: df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_data_yield_values', bbox_inches='tight') plt.close() # esm cols = [col for col in df.columns if "phone_esm" in col] df_x = df[cols] print(len(cols)) print(df_x) df_x = df_x.dropna(axis=0, how="all") sns.heatmap(df_x.isna(), xticklabels=1) plt.savefig(f'phone_esm_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_x: df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_phone_esm_values', bbox_inches='tight') plt.close() # light cols = [col for col in df.columns if "phone_light" in col] df_x = df[cols] print(len(cols)) print(df_x) df_x = df_x.dropna(axis=0, how="all") sns.heatmap(df_x.isna(), xticklabels=1) plt.savefig(f'phone_light_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_x: df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_phone_light_values', bbox_inches='tight') plt.close() # locations_doryab cols = [col for col in df.columns if "locations_doryab" in col] df_x = df[cols] print(len(cols)) print(df_x) df_x = df_x.dropna(axis=0, how="all") sns.heatmap(df_x.isna(), xticklabels=1) plt.savefig(f'locations_doryab_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_x: df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_locations_doryab_values', bbox_inches='tight') plt.close() # locations_barnett # Not working # messages cols = [col for col in df.columns if "phone_messages" in col] df_x = df[cols] print(len(cols)) print(df_x) df_x = df_x.dropna(axis=0, how="all") sns.heatmap(df_x.isna(), xticklabels=1) plt.savefig(f'phone_messages_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_x: df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_phone_messages_values', bbox_inches='tight') plt.close() # screen cols = [col for col in df.columns if "phone_screen" in col] df_x = df[cols] print(len(cols)) print(df_x) df_x = df_x.dropna(axis=0, how="all") sns.heatmap(df_x.isna(), xticklabels=1) plt.savefig(f'phone_screen_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_x: df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_phone_screen_values', bbox_inches='tight') plt.close() # wifi_visible cols = [col for col in df.columns if "wifi_visible" in col] df_x = df[cols] print(len(cols)) print(df_x) df_x = df_x.dropna(axis=0, how="all") sns.heatmap(df_x.isna(), xticklabels=1) plt.savefig(f'wifi_visible_values', bbox_inches='tight') df_q = pd.DataFrame() for col in df_x: df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) sns.heatmap(df_q, cbar=False, xticklabels=1) plt.savefig(f'cut_wifi_visible_values', bbox_inches='tight') plt.close() # All features print(len(df)) print(df) # df = df.dropna(axis=0, how="all") # df = df.dropna(axis=1, how="all") sns.heatmap(df.isna()) plt.savefig(f'all_features', bbox_inches='tight') print(df.columns[df.isna().all()].tolist()) print("All NaNs:", df.isna().sum().sum()) print("Df shape NaNs:", df.shape)