From 385e21409d1d3978588fea56cc26a8d241e3af35 Mon Sep 17 00:00:00 2001 From: Primoz Date: Thu, 15 Sep 2022 14:16:58 +0000 Subject: [PATCH] Changes in NaN values testing script. --- tests/scripts/doryab_values.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/scripts/doryab_values.py b/tests/scripts/doryab_values.py index d51ceaa0..a163e261 100644 --- a/tests/scripts/doryab_values.py +++ b/tests/scripts/doryab_values.py @@ -49,3 +49,36 @@ plt.plot(df_loc['phone_locations_doryab_loglocationvariance']) plt.savefig(f'phone_locations_doryab_loglocationvariance', bbox_inches='tight') plt.close() +# Phone calls & messages +calls_sms_cols = [col for col in df.columns if "phone_calls" in col or "phone_messages" in col] +df_cs= df[calls_sms_cols] + +print(len(calls_sms_cols)) +print(df_cs) + +df_cs = df_cs.dropna(axis=0, how="all") +sns.heatmap(df_cs.isna(), xticklabels=1) +plt.savefig(f'calls_sms_values', bbox_inches='tight') + +df_q = pd.DataFrame() +for col in df_cs: + df_q[col] = pd.to_numeric(pd.cut(df_cs[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q, cbar=False, xticklabels=1) +plt.savefig(f'cut_calls_sms_values', bbox_inches='tight') +plt.close() + + +# All features + +print(len(df)) +print(df) + +df = df.dropna(axis=0, how="all") +df = df.dropna(axis=1, how="all") +sns.heatmap(df.isna()) +plt.savefig(f'all_features', bbox_inches='tight') + +print(df.columns[df.isna().all()].tolist()) +print("All NaNs:", df.isna().sum().sum()) +print("Df shape NaNs:", df.shape) \ No newline at end of file