Implement algorithm for sequential adding of the most important features.
parent
85e572fca0
commit
07ef72dec5
|
@ -72,7 +72,7 @@ demo_features = ['demo_age', 'demo_limesurvey_demand', 'demo_limesurvey_control'
|
|||
# %%
|
||||
# Get phone and non-phone columns
|
||||
|
||||
def make_predictions_with_sensor_groups(df, groups_substrings, include_group=True):
|
||||
def make_predictions_with_sensor_groups(df, groups_substrings, include_group=True, with_cols=[]):
|
||||
"""
|
||||
This function makes predictions with sensor groups.
|
||||
It takes in a dataframe (df), a list of group substrings (groups_substrings)
|
||||
|
@ -87,6 +87,10 @@ def make_predictions_with_sensor_groups(df, groups_substrings, include_group=Tru
|
|||
is set to True or False.
|
||||
|
||||
"""
|
||||
|
||||
best_sensor = None
|
||||
best_recall_score, best_f1_sore = None, None
|
||||
|
||||
for fgroup_substr in groups_substrings:
|
||||
if fgroup_substr is None:
|
||||
feature_group_cols = list(df.columns)
|
||||
|
@ -99,7 +103,10 @@ def make_predictions_with_sensor_groups(df, groups_substrings, include_group=Tru
|
|||
feature_group_cols = [col for col in df.columns if fgroup_substr not in col and col not in ['pid', 'target']]
|
||||
|
||||
|
||||
X, y = df.drop(columns=['target', 'pid'])[feature_group_cols], df['target']
|
||||
X, y = df.drop(columns=['target', 'pid'])[feature_group_cols+with_cols], df['target']
|
||||
|
||||
print(fgroup_substr, X.shape)
|
||||
|
||||
imputer = SimpleImputer(missing_values=np.nan, strategy='median')
|
||||
X = imputer.fit_transform(X)
|
||||
|
||||
|
@ -115,39 +122,78 @@ def make_predictions_with_sensor_groups(df, groups_substrings, include_group=Tru
|
|||
else:
|
||||
print("\nPrediction without", fgroup_substr)
|
||||
|
||||
|
||||
acc = metrics.accuracy_score(y_test, y_pred)
|
||||
prec = metrics.precision_score(y_test, y_pred)
|
||||
rec = metrics.recall_score(y_test, y_pred)
|
||||
f1 = metrics.f1_score(y_test, y_pred)
|
||||
|
||||
print("************************************************")
|
||||
print("Accuracy", metrics.accuracy_score(y_test, y_pred))
|
||||
print("Precision", metrics.precision_score(y_test, y_pred))
|
||||
print("Recall", metrics.recall_score(y_test, y_pred))
|
||||
print("F1", metrics.f1_score(y_test, y_pred), "\n")
|
||||
print("Accuracy", acc)
|
||||
print("Precision", prec)
|
||||
print("Recall", rec)
|
||||
print("F1", f1, "\n")
|
||||
|
||||
if (not best_recall_score and not best_f1_sore) or (rec > best_recall_score):
|
||||
best_sensor = fgroup_substr
|
||||
best_recall_score, best_f1_sore = rec, f1
|
||||
|
||||
return best_sensor, best_recall_score, best_f1_sore
|
||||
|
||||
# %% [markdown]
|
||||
# ### Senzor big feature groups (phone, empatica, demografical)
|
||||
groups_substr = [None, "phone_", "empatica_", "demo_"]
|
||||
make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=groups_substr, include_group=False)
|
||||
# ### sensor big feature groups (phone, empatica, demografical)
|
||||
big_groups_substr = ["phone_", "empatica_", "demo_"]
|
||||
make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=big_groups_substr, include_group=False)
|
||||
|
||||
# %% [markdown]
|
||||
# ### Empatica sezor groups
|
||||
make_predictions_with_sensor_groups(model_input.copy(), groups_substrings="_", include_group=True)
|
||||
e4_sensors = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_"]
|
||||
make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=e4_sensors, include_group=False)
|
||||
# make_predictions_with_sensor_groups(model_input.copy(), groups_substrings="_", include_group=True)
|
||||
# e4_sensors = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_"]
|
||||
# make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=e4_sensors, include_group=False)
|
||||
|
||||
# %% [markdown]
|
||||
# ### Phone sensor groups
|
||||
make_predictions_with_sensor_groups(model_input.copy(), groups_substrings="_", include_group=True)
|
||||
phone_sensors = ["phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery", "phone_calls_", "phone_data_yield_",
|
||||
"phone_light_", "phone_location_", "phone_messages", "phone_screen_", "phone_speech_"]
|
||||
make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=phone_sensors, include_group=False)
|
||||
# make_predictions_with_sensor_groups(model_input.copy(), groups_substrings="_", include_group=True)
|
||||
# phone_sensors = ["phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery", "phone_calls_", "phone_data_yield_",
|
||||
# "phone_light_", "phone_location_", "phone_messages", "phone_screen_", "phone_speech_"]
|
||||
# make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=phone_sensors, include_group=False)
|
||||
|
||||
# %%
|
||||
# Write all the sensors (phone, empatica), seperate other (demographical) cols also
|
||||
|
||||
sensors_features_groups = ["_", "phone_", "empatica_", "empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_",
|
||||
"phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery", "phone_calls_", "phone_data_yield_", "phone_light_"
|
||||
"phone_location_", "phone_messages", "phone_screen_", "phone_speech_"]
|
||||
sensors_features_groups = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_",
|
||||
"phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_data_yield_", "phone_light_",
|
||||
"phone_locations_", "phone_messages", "phone_screen_"] # , "phone_speech_"]
|
||||
# %%
|
||||
def find_sensor_group_features_importance(model_input, sensor_groups_strings):
|
||||
sensor_importance_scores = []
|
||||
model_input = model_input.copy()
|
||||
sensor_groups_strings = sensor_groups_strings.copy()
|
||||
groups_len = len(sensor_groups_strings)
|
||||
for i in range(groups_len):
|
||||
important_cols = [col[0] for col in sensor_importance_scores]
|
||||
with_cols = [col for col in model_input.columns if any(col.startswith(y) for y in important_cols)]
|
||||
|
||||
|
||||
best_sensor, best_recall_score, best_f1_sore = \
|
||||
make_predictions_with_sensor_groups(model_input,
|
||||
groups_substrings=sensor_groups_strings, include_group=True,
|
||||
with_cols=with_cols)
|
||||
sensor_importance_scores.append((best_sensor, best_recall_score, best_f1_sore))
|
||||
print(f"\nAdded sensor: {best_sensor}\n")
|
||||
sensor_groups_strings.remove(best_sensor)
|
||||
|
||||
print([col for col in model_input.columns if "phone_" not in col or "empatica_" not in col])
|
||||
return sensor_importance_scores
|
||||
|
||||
# sensor_importance_scores = find_sensor_group_features_importance(model_input, big_groups_substr)
|
||||
sensor_groups_importance_scores = find_sensor_group_features_importance(model_input, sensors_features_groups)
|
||||
|
||||
# %%
|
||||
best_sensor_group = sensor_groups_importance_scores[0][0]
|
||||
best_sensor_features = [col for col in model_input if col.startswith(best_sensor_group)]
|
||||
|
||||
best_sensor_group_importance_scores = find_sensor_group_features_importance(model_input, best_sensor_features)
|
||||
|
||||
# %%
|
||||
|
||||
# TODO: visualization of sensor_group and best_sensor_group importance (Recall/f1-score chart)
|
Loading…
Reference in New Issue