rapids/tests/scripts/doryab_values.py

84 lines
2.4 KiB
Python

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv"
df = pd.read_csv(path)
# Bluetooth
doryab_cols_bt = [col for col in df.columns if "bluetooth_doryab" in col]
df_bt = df[doryab_cols_bt]
print(len(doryab_cols_bt))
print(df_bt)
df_bt = df_bt.dropna(axis=0, how="all")
sns.heatmap(df_bt.isna(), xticklabels=1)
plt.savefig(f'bluetooth_doryab_values', bbox_inches='tight')
df_q = pd.DataFrame()
for col in df_bt:
df_q[col] = pd.to_numeric(pd.cut(df_bt[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
sns.heatmap(df_q, cbar=False, xticklabels=1)
plt.savefig(f'cut_bluetooth_doryab_values', bbox_inches='tight')
plt.close()
# Location
doryab_cols_loc = [col for col in df.columns if "locations_doryab" in col]
df_loc = df[doryab_cols_loc]
print(len(doryab_cols_loc))
print(df_loc)
df_loc = df_loc.dropna(axis=0, how="all").reset_index(drop=True)
print(df_loc)
sns.heatmap(df_loc.isna())
plt.savefig(f'locations_doryab_values', bbox_inches='tight')
df_q = pd.DataFrame()
for col in df_loc:
df_q[col] = pd.to_numeric(pd.cut(df_loc[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
sns.heatmap(df_q, cbar=False, xticklabels=1)
plt.savefig(f'cut_location_doryab_values', bbox_inches='tight')
plt.close()
plt.plot(df_loc['phone_locations_doryab_loglocationvariance'])
plt.savefig(f'phone_locations_doryab_loglocationvariance', bbox_inches='tight')
plt.close()
# Phone calls & messages
calls_sms_cols = [col for col in df.columns if "phone_calls" in col or "phone_messages" in col]
df_cs= df[calls_sms_cols]
print(len(calls_sms_cols))
print(df_cs)
df_cs = df_cs.dropna(axis=0, how="all")
sns.heatmap(df_cs.isna(), xticklabels=1)
plt.savefig(f'calls_sms_values', bbox_inches='tight')
df_q = pd.DataFrame()
for col in df_cs:
df_q[col] = pd.to_numeric(pd.cut(df_cs[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
sns.heatmap(df_q, cbar=False, xticklabels=1)
plt.savefig(f'cut_calls_sms_values', bbox_inches='tight')
plt.close()
# All features
print(len(df))
print(df)
df = df.dropna(axis=0, how="all")
df = df.dropna(axis=1, how="all")
sns.heatmap(df.isna())
plt.savefig(f'all_features', bbox_inches='tight')
print(df.columns[df.isna().all()].tolist())
print("All NaNs:", df.isna().sum().sum())
print("Df shape NaNs:", df.shape)