Changes in NaN values testing script.
parent
18002f59e1
commit
385e21409d
|
@ -49,3 +49,36 @@ plt.plot(df_loc['phone_locations_doryab_loglocationvariance'])
|
||||||
plt.savefig(f'phone_locations_doryab_loglocationvariance', bbox_inches='tight')
|
plt.savefig(f'phone_locations_doryab_loglocationvariance', bbox_inches='tight')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
|
# Phone calls & messages
|
||||||
|
calls_sms_cols = [col for col in df.columns if "phone_calls" in col or "phone_messages" in col]
|
||||||
|
df_cs= df[calls_sms_cols]
|
||||||
|
|
||||||
|
print(len(calls_sms_cols))
|
||||||
|
print(df_cs)
|
||||||
|
|
||||||
|
df_cs = df_cs.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_cs.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'calls_sms_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_cs:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_cs[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_calls_sms_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
|
# All features
|
||||||
|
|
||||||
|
print(len(df))
|
||||||
|
print(df)
|
||||||
|
|
||||||
|
df = df.dropna(axis=0, how="all")
|
||||||
|
df = df.dropna(axis=1, how="all")
|
||||||
|
sns.heatmap(df.isna())
|
||||||
|
plt.savefig(f'all_features', bbox_inches='tight')
|
||||||
|
|
||||||
|
print(df.columns[df.isna().all()].tolist())
|
||||||
|
print("All NaNs:", df.isna().sum().sum())
|
||||||
|
print("Df shape NaNs:", df.shape)
|
Loading…
Reference in New Issue