Changes in phone features NaN values script.
parent
385e21409d
commit
7d85f75d21
|
@ -1,84 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
import seaborn as sns
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
|
|
||||||
path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv"
|
|
||||||
df = pd.read_csv(path)
|
|
||||||
|
|
||||||
# Bluetooth
|
|
||||||
doryab_cols_bt = [col for col in df.columns if "bluetooth_doryab" in col]
|
|
||||||
df_bt = df[doryab_cols_bt]
|
|
||||||
|
|
||||||
print(len(doryab_cols_bt))
|
|
||||||
print(df_bt)
|
|
||||||
|
|
||||||
df_bt = df_bt.dropna(axis=0, how="all")
|
|
||||||
sns.heatmap(df_bt.isna(), xticklabels=1)
|
|
||||||
plt.savefig(f'bluetooth_doryab_values', bbox_inches='tight')
|
|
||||||
|
|
||||||
df_q = pd.DataFrame()
|
|
||||||
for col in df_bt:
|
|
||||||
df_q[col] = pd.to_numeric(pd.cut(df_bt[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
|
||||||
|
|
||||||
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
|
||||||
plt.savefig(f'cut_bluetooth_doryab_values', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
# Location
|
|
||||||
doryab_cols_loc = [col for col in df.columns if "locations_doryab" in col]
|
|
||||||
df_loc = df[doryab_cols_loc]
|
|
||||||
|
|
||||||
print(len(doryab_cols_loc))
|
|
||||||
print(df_loc)
|
|
||||||
|
|
||||||
df_loc = df_loc.dropna(axis=0, how="all").reset_index(drop=True)
|
|
||||||
print(df_loc)
|
|
||||||
sns.heatmap(df_loc.isna())
|
|
||||||
plt.savefig(f'locations_doryab_values', bbox_inches='tight')
|
|
||||||
|
|
||||||
df_q = pd.DataFrame()
|
|
||||||
for col in df_loc:
|
|
||||||
df_q[col] = pd.to_numeric(pd.cut(df_loc[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
|
||||||
|
|
||||||
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
|
||||||
plt.savefig(f'cut_location_doryab_values', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
plt.plot(df_loc['phone_locations_doryab_loglocationvariance'])
|
|
||||||
plt.savefig(f'phone_locations_doryab_loglocationvariance', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
# Phone calls & messages
|
|
||||||
calls_sms_cols = [col for col in df.columns if "phone_calls" in col or "phone_messages" in col]
|
|
||||||
df_cs= df[calls_sms_cols]
|
|
||||||
|
|
||||||
print(len(calls_sms_cols))
|
|
||||||
print(df_cs)
|
|
||||||
|
|
||||||
df_cs = df_cs.dropna(axis=0, how="all")
|
|
||||||
sns.heatmap(df_cs.isna(), xticklabels=1)
|
|
||||||
plt.savefig(f'calls_sms_values', bbox_inches='tight')
|
|
||||||
|
|
||||||
df_q = pd.DataFrame()
|
|
||||||
for col in df_cs:
|
|
||||||
df_q[col] = pd.to_numeric(pd.cut(df_cs[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
|
||||||
|
|
||||||
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
|
||||||
plt.savefig(f'cut_calls_sms_values', bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
|
|
||||||
# All features
|
|
||||||
|
|
||||||
print(len(df))
|
|
||||||
print(df)
|
|
||||||
|
|
||||||
df = df.dropna(axis=0, how="all")
|
|
||||||
df = df.dropna(axis=1, how="all")
|
|
||||||
sns.heatmap(df.isna())
|
|
||||||
plt.savefig(f'all_features', bbox_inches='tight')
|
|
||||||
|
|
||||||
print(df.columns[df.isna().all()].tolist())
|
|
||||||
print("All NaNs:", df.isna().sum().sum())
|
|
||||||
print("Df shape NaNs:", df.shape)
|
|
|
@ -0,0 +1,285 @@
|
||||||
|
import pandas as pd
|
||||||
|
import seaborn as sns
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv"
|
||||||
|
df = pd.read_csv(path)
|
||||||
|
|
||||||
|
# activity_recognition
|
||||||
|
|
||||||
|
cols = [col for col in df.columns if "activity_recognition" in col]
|
||||||
|
df_x = df[cols]
|
||||||
|
|
||||||
|
print(len(cols))
|
||||||
|
print(df_x)
|
||||||
|
|
||||||
|
df_x = df_x.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_x.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'activity_recognition_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_x:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_activity_recognition_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# applications_foreground
|
||||||
|
|
||||||
|
cols = [col for col in df.columns if "applications_foreground" in col]
|
||||||
|
df_x = df[cols]
|
||||||
|
|
||||||
|
print(len(cols))
|
||||||
|
print(df_x)
|
||||||
|
|
||||||
|
df_x = df_x.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_x.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'applications_foreground_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_x:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_applications_foreground_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# battery
|
||||||
|
|
||||||
|
cols = [col for col in df.columns if "phone_battery" in col]
|
||||||
|
df_x = df[cols]
|
||||||
|
|
||||||
|
print(len(cols))
|
||||||
|
print(df_x)
|
||||||
|
|
||||||
|
df_x = df_x.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_x.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'phone_battery_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_x:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_phone_battery_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# bluetooth_doryab
|
||||||
|
|
||||||
|
cols = [col for col in df.columns if "bluetooth_doryab" in col]
|
||||||
|
df_x = df[cols]
|
||||||
|
|
||||||
|
print(len(cols))
|
||||||
|
print(df_x)
|
||||||
|
|
||||||
|
df_x = df_x.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_x.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'bluetooth_doryab_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_x:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_bluetooth_doryab_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# bluetooth_rapids
|
||||||
|
|
||||||
|
cols = [col for col in df.columns if "bluetooth_rapids" in col]
|
||||||
|
df_x = df[cols]
|
||||||
|
|
||||||
|
print(len(cols))
|
||||||
|
print(df_x)
|
||||||
|
|
||||||
|
df_x = df_x.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_x.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'bluetooth_rapids_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_x:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_bluetooth_rapids_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# calls
|
||||||
|
|
||||||
|
cols = [col for col in df.columns if "phone_calls" in col]
|
||||||
|
df_x = df[cols]
|
||||||
|
|
||||||
|
print(len(cols))
|
||||||
|
print(df_x)
|
||||||
|
|
||||||
|
df_x = df_x.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_x.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'phone_calls_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_x:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_phone_calls_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# data_yield
|
||||||
|
|
||||||
|
cols = [col for col in df.columns if "data_yield" in col]
|
||||||
|
df_x = df[cols]
|
||||||
|
|
||||||
|
print(len(cols))
|
||||||
|
print(df_x)
|
||||||
|
|
||||||
|
df_x = df_x.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_x.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'data_yield_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_x:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_data_yield_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# esm
|
||||||
|
|
||||||
|
cols = [col for col in df.columns if "phone_esm" in col]
|
||||||
|
df_x = df[cols]
|
||||||
|
|
||||||
|
print(len(cols))
|
||||||
|
print(df_x)
|
||||||
|
|
||||||
|
df_x = df_x.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_x.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'phone_esm_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_x:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_phone_esm_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# light
|
||||||
|
|
||||||
|
cols = [col for col in df.columns if "phone_light" in col]
|
||||||
|
df_x = df[cols]
|
||||||
|
|
||||||
|
print(len(cols))
|
||||||
|
print(df_x)
|
||||||
|
|
||||||
|
df_x = df_x.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_x.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'phone_light_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_x:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_phone_light_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# locations_doryab
|
||||||
|
|
||||||
|
cols = [col for col in df.columns if "locations_doryab" in col]
|
||||||
|
df_x = df[cols]
|
||||||
|
|
||||||
|
print(len(cols))
|
||||||
|
print(df_x)
|
||||||
|
|
||||||
|
df_x = df_x.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_x.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'locations_doryab_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_x:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_locations_doryab_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# locations_barnett
|
||||||
|
|
||||||
|
# Not working
|
||||||
|
|
||||||
|
# messages
|
||||||
|
|
||||||
|
cols = [col for col in df.columns if "phone_messages" in col]
|
||||||
|
df_x = df[cols]
|
||||||
|
|
||||||
|
print(len(cols))
|
||||||
|
print(df_x)
|
||||||
|
|
||||||
|
df_x = df_x.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_x.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'phone_messages_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_x:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_phone_messages_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# screen
|
||||||
|
|
||||||
|
cols = [col for col in df.columns if "phone_screen" in col]
|
||||||
|
df_x = df[cols]
|
||||||
|
|
||||||
|
print(len(cols))
|
||||||
|
print(df_x)
|
||||||
|
|
||||||
|
df_x = df_x.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_x.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'phone_screen_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_x:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_phone_screen_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# wifi_visible
|
||||||
|
|
||||||
|
cols = [col for col in df.columns if "wifi_visible" in col]
|
||||||
|
df_x = df[cols]
|
||||||
|
|
||||||
|
print(len(cols))
|
||||||
|
print(df_x)
|
||||||
|
|
||||||
|
df_x = df_x.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_x.isna(), xticklabels=1)
|
||||||
|
plt.savefig(f'wifi_visible_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_x:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_x[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_wifi_visible_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# All features
|
||||||
|
|
||||||
|
print(len(df))
|
||||||
|
print(df)
|
||||||
|
|
||||||
|
# df = df.dropna(axis=0, how="all")
|
||||||
|
# df = df.dropna(axis=1, how="all")
|
||||||
|
sns.heatmap(df.isna())
|
||||||
|
plt.savefig(f'all_features', bbox_inches='tight')
|
||||||
|
|
||||||
|
print(df.columns[df.isna().all()].tolist())
|
||||||
|
print("All NaNs:", df.isna().sum().sum())
|
||||||
|
print("Df shape NaNs:", df.shape)
|
Loading…
Reference in New Issue