From 7d85f75d218ef9b3af5ae077343a58e62bb5fa2c Mon Sep 17 00:00:00 2001 From: Primoz Date: Fri, 16 Sep 2022 09:03:30 +0000 Subject: [PATCH] Changes in phone features NaN values script. --- tests/scripts/doryab_values.py | 84 ---------- tests/scripts/phone_feats.py | 285 +++++++++++++++++++++++++++++++++ 2 files changed, 285 insertions(+), 84 deletions(-) delete mode 100644 tests/scripts/doryab_values.py create mode 100644 tests/scripts/phone_feats.py diff --git a/tests/scripts/doryab_values.py b/tests/scripts/doryab_values.py deleted file mode 100644 index a163e261..00000000 --- a/tests/scripts/doryab_values.py +++ /dev/null @@ -1,84 +0,0 @@ -import pandas as pd -import seaborn as sns -import matplotlib.pyplot as plt - - -path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv" -df = pd.read_csv(path) - -# Bluetooth -doryab_cols_bt = [col for col in df.columns if "bluetooth_doryab" in col] -df_bt = df[doryab_cols_bt] - -print(len(doryab_cols_bt)) -print(df_bt) - -df_bt = df_bt.dropna(axis=0, how="all") -sns.heatmap(df_bt.isna(), xticklabels=1) -plt.savefig(f'bluetooth_doryab_values', bbox_inches='tight') - -df_q = pd.DataFrame() -for col in df_bt: - df_q[col] = pd.to_numeric(pd.cut(df_bt[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) - -sns.heatmap(df_q, cbar=False, xticklabels=1) -plt.savefig(f'cut_bluetooth_doryab_values', bbox_inches='tight') -plt.close() - -# Location -doryab_cols_loc = [col for col in df.columns if "locations_doryab" in col] -df_loc = df[doryab_cols_loc] - -print(len(doryab_cols_loc)) -print(df_loc) - -df_loc = df_loc.dropna(axis=0, how="all").reset_index(drop=True) -print(df_loc) -sns.heatmap(df_loc.isna()) -plt.savefig(f'locations_doryab_values', bbox_inches='tight') - -df_q = pd.DataFrame() -for col in df_loc: - df_q[col] = pd.to_numeric(pd.cut(df_loc[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) - -sns.heatmap(df_q, cbar=False, xticklabels=1) -plt.savefig(f'cut_location_doryab_values', bbox_inches='tight') -plt.close() - -plt.plot(df_loc['phone_locations_doryab_loglocationvariance']) -plt.savefig(f'phone_locations_doryab_loglocationvariance', bbox_inches='tight') -plt.close() - -# Phone calls & messages -calls_sms_cols = [col for col in df.columns if "phone_calls" in col or "phone_messages" in col] -df_cs= df[calls_sms_cols] - -print(len(calls_sms_cols)) -print(df_cs) - -df_cs = df_cs.dropna(axis=0, how="all") -sns.heatmap(df_cs.isna(), xticklabels=1) -plt.savefig(f'calls_sms_values', bbox_inches='tight') - -df_q = pd.DataFrame() -for col in df_cs: - df_q[col] = pd.to_numeric(pd.cut(df_cs[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) - -sns.heatmap(df_q, cbar=False, xticklabels=1) -plt.savefig(f'cut_calls_sms_values', bbox_inches='tight') -plt.close() - - -# All features - -print(len(df)) -print(df) - -df = df.dropna(axis=0, how="all") -df = df.dropna(axis=1, how="all") -sns.heatmap(df.isna()) -plt.savefig(f'all_features', bbox_inches='tight') - -print(df.columns[df.isna().all()].tolist()) -print("All NaNs:", df.isna().sum().sum()) -print("Df shape NaNs:", df.shape) \ No newline at end of file diff --git a/tests/scripts/phone_feats.py b/tests/scripts/phone_feats.py new file mode 100644 index 00000000..060fe3fd --- /dev/null +++ b/tests/scripts/phone_feats.py @@ -0,0 +1,285 @@ +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + + +path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv" +df = pd.read_csv(path) + +# activity_recognition + +cols = [col for col in df.columns if "activity_recognition" in col] +df_x = df[cols] + +print(len(cols)) +print(df_x) + +df_x = df_x.dropna(axis=0, how="all") +sns.heatmap(df_x.isna(), xticklabels=1) +plt.savefig(f'activity_recognition_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_x: + df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_activity_recognition_values', bbox_inches='tight') +plt.close() + +# applications_foreground + +cols = [col for col in df.columns if "applications_foreground" in col] +df_x = df[cols] + +print(len(cols)) +print(df_x) + +df_x = df_x.dropna(axis=0, how="all") +sns.heatmap(df_x.isna(), xticklabels=1) +plt.savefig(f'applications_foreground_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_x: + df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_applications_foreground_values', bbox_inches='tight') +plt.close() + +# battery + +cols = [col for col in df.columns if "phone_battery" in col] +df_x = df[cols] + +print(len(cols)) +print(df_x) + +df_x = df_x.dropna(axis=0, how="all") +sns.heatmap(df_x.isna(), xticklabels=1) +plt.savefig(f'phone_battery_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_x: + df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_phone_battery_values', bbox_inches='tight') +plt.close() + +# bluetooth_doryab + +cols = [col for col in df.columns if "bluetooth_doryab" in col] +df_x = df[cols] + +print(len(cols)) +print(df_x) + +df_x = df_x.dropna(axis=0, how="all") +sns.heatmap(df_x.isna(), xticklabels=1) +plt.savefig(f'bluetooth_doryab_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_x: + df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_bluetooth_doryab_values', bbox_inches='tight') +plt.close() + +# bluetooth_rapids + +cols = [col for col in df.columns if "bluetooth_rapids" in col] +df_x = df[cols] + +print(len(cols)) +print(df_x) + +df_x = df_x.dropna(axis=0, how="all") +sns.heatmap(df_x.isna(), xticklabels=1) +plt.savefig(f'bluetooth_rapids_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_x: + df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_bluetooth_rapids_values', bbox_inches='tight') +plt.close() + +# calls + +cols = [col for col in df.columns if "phone_calls" in col] +df_x = df[cols] + +print(len(cols)) +print(df_x) + +df_x = df_x.dropna(axis=0, how="all") +sns.heatmap(df_x.isna(), xticklabels=1) +plt.savefig(f'phone_calls_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_x: + df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_phone_calls_values', bbox_inches='tight') +plt.close() + +# data_yield + +cols = [col for col in df.columns if "data_yield" in col] +df_x = df[cols] + +print(len(cols)) +print(df_x) + +df_x = df_x.dropna(axis=0, how="all") +sns.heatmap(df_x.isna(), xticklabels=1) +plt.savefig(f'data_yield_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_x: + df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_data_yield_values', bbox_inches='tight') +plt.close() + +# esm + +cols = [col for col in df.columns if "phone_esm" in col] +df_x = df[cols] + +print(len(cols)) +print(df_x) + +df_x = df_x.dropna(axis=0, how="all") +sns.heatmap(df_x.isna(), xticklabels=1) +plt.savefig(f'phone_esm_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_x: + df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_phone_esm_values', bbox_inches='tight') +plt.close() + +# light + +cols = [col for col in df.columns if "phone_light" in col] +df_x = df[cols] + +print(len(cols)) +print(df_x) + +df_x = df_x.dropna(axis=0, how="all") +sns.heatmap(df_x.isna(), xticklabels=1) +plt.savefig(f'phone_light_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_x: + df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_phone_light_values', bbox_inches='tight') +plt.close() + +# locations_doryab + +cols = [col for col in df.columns if "locations_doryab" in col] +df_x = df[cols] + +print(len(cols)) +print(df_x) + +df_x = df_x.dropna(axis=0, how="all") +sns.heatmap(df_x.isna(), xticklabels=1) +plt.savefig(f'locations_doryab_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_x: + df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_locations_doryab_values', bbox_inches='tight') +plt.close() + +# locations_barnett + +# Not working + +# messages + +cols = [col for col in df.columns if "phone_messages" in col] +df_x = df[cols] + +print(len(cols)) +print(df_x) + +df_x = df_x.dropna(axis=0, how="all") +sns.heatmap(df_x.isna(), xticklabels=1) +plt.savefig(f'phone_messages_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_x: + df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_phone_messages_values', bbox_inches='tight') +plt.close() + +# screen + +cols = [col for col in df.columns if "phone_screen" in col] +df_x = df[cols] + +print(len(cols)) +print(df_x) + +df_x = df_x.dropna(axis=0, how="all") +sns.heatmap(df_x.isna(), xticklabels=1) +plt.savefig(f'phone_screen_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_x: + df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_phone_screen_values', bbox_inches='tight') +plt.close() + +# wifi_visible + +cols = [col for col in df.columns if "wifi_visible" in col] +df_x = df[cols] + +print(len(cols)) +print(df_x) + +df_x = df_x.dropna(axis=0, how="all") +sns.heatmap(df_x.isna(), xticklabels=1) +plt.savefig(f'wifi_visible_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_x: + df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_wifi_visible_values', bbox_inches='tight') +plt.close() + +# All features + +print(len(df)) +print(df) + +# df = df.dropna(axis=0, how="all") +# df = df.dropna(axis=1, how="all") +sns.heatmap(df.isna()) +plt.savefig(f'all_features', bbox_inches='tight') + +print(df.columns[df.isna().all()].tolist()) +print("All NaNs:", df.isna().sum().sum()) +print("Df shape NaNs:", df.shape) \ No newline at end of file