Implement algorithm for sequential adding of the most important features.
parent
85e572fca0
commit
07ef72dec5
|
@ -72,7 +72,7 @@ demo_features = ['demo_age', 'demo_limesurvey_demand', 'demo_limesurvey_control'
|
||||||
# %%
|
# %%
|
||||||
# Get phone and non-phone columns
|
# Get phone and non-phone columns
|
||||||
|
|
||||||
def make_predictions_with_sensor_groups(df, groups_substrings, include_group=True):
|
def make_predictions_with_sensor_groups(df, groups_substrings, include_group=True, with_cols=[]):
|
||||||
"""
|
"""
|
||||||
This function makes predictions with sensor groups.
|
This function makes predictions with sensor groups.
|
||||||
It takes in a dataframe (df), a list of group substrings (groups_substrings)
|
It takes in a dataframe (df), a list of group substrings (groups_substrings)
|
||||||
|
@ -87,6 +87,10 @@ def make_predictions_with_sensor_groups(df, groups_substrings, include_group=Tru
|
||||||
is set to True or False.
|
is set to True or False.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
best_sensor = None
|
||||||
|
best_recall_score, best_f1_sore = None, None
|
||||||
|
|
||||||
for fgroup_substr in groups_substrings:
|
for fgroup_substr in groups_substrings:
|
||||||
if fgroup_substr is None:
|
if fgroup_substr is None:
|
||||||
feature_group_cols = list(df.columns)
|
feature_group_cols = list(df.columns)
|
||||||
|
@ -99,7 +103,10 @@ def make_predictions_with_sensor_groups(df, groups_substrings, include_group=Tru
|
||||||
feature_group_cols = [col for col in df.columns if fgroup_substr not in col and col not in ['pid', 'target']]
|
feature_group_cols = [col for col in df.columns if fgroup_substr not in col and col not in ['pid', 'target']]
|
||||||
|
|
||||||
|
|
||||||
X, y = df.drop(columns=['target', 'pid'])[feature_group_cols], df['target']
|
X, y = df.drop(columns=['target', 'pid'])[feature_group_cols+with_cols], df['target']
|
||||||
|
|
||||||
|
print(fgroup_substr, X.shape)
|
||||||
|
|
||||||
imputer = SimpleImputer(missing_values=np.nan, strategy='median')
|
imputer = SimpleImputer(missing_values=np.nan, strategy='median')
|
||||||
X = imputer.fit_transform(X)
|
X = imputer.fit_transform(X)
|
||||||
|
|
||||||
|
@ -115,39 +122,78 @@ def make_predictions_with_sensor_groups(df, groups_substrings, include_group=Tru
|
||||||
else:
|
else:
|
||||||
print("\nPrediction without", fgroup_substr)
|
print("\nPrediction without", fgroup_substr)
|
||||||
|
|
||||||
|
|
||||||
|
acc = metrics.accuracy_score(y_test, y_pred)
|
||||||
|
prec = metrics.precision_score(y_test, y_pred)
|
||||||
|
rec = metrics.recall_score(y_test, y_pred)
|
||||||
|
f1 = metrics.f1_score(y_test, y_pred)
|
||||||
|
|
||||||
print("************************************************")
|
print("************************************************")
|
||||||
print("Accuracy", metrics.accuracy_score(y_test, y_pred))
|
print("Accuracy", acc)
|
||||||
print("Precision", metrics.precision_score(y_test, y_pred))
|
print("Precision", prec)
|
||||||
print("Recall", metrics.recall_score(y_test, y_pred))
|
print("Recall", rec)
|
||||||
print("F1", metrics.f1_score(y_test, y_pred), "\n")
|
print("F1", f1, "\n")
|
||||||
|
|
||||||
|
if (not best_recall_score and not best_f1_sore) or (rec > best_recall_score):
|
||||||
|
best_sensor = fgroup_substr
|
||||||
|
best_recall_score, best_f1_sore = rec, f1
|
||||||
|
|
||||||
|
return best_sensor, best_recall_score, best_f1_sore
|
||||||
|
|
||||||
# %% [markdown]
|
# %% [markdown]
|
||||||
# ### Senzor big feature groups (phone, empatica, demografical)
|
# ### sensor big feature groups (phone, empatica, demografical)
|
||||||
groups_substr = [None, "phone_", "empatica_", "demo_"]
|
big_groups_substr = ["phone_", "empatica_", "demo_"]
|
||||||
make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=groups_substr, include_group=False)
|
make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=big_groups_substr, include_group=False)
|
||||||
|
|
||||||
# %% [markdown]
|
# %% [markdown]
|
||||||
# ### Empatica sezor groups
|
# ### Empatica sezor groups
|
||||||
make_predictions_with_sensor_groups(model_input.copy(), groups_substrings="_", include_group=True)
|
# make_predictions_with_sensor_groups(model_input.copy(), groups_substrings="_", include_group=True)
|
||||||
e4_sensors = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_"]
|
# e4_sensors = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_"]
|
||||||
make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=e4_sensors, include_group=False)
|
# make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=e4_sensors, include_group=False)
|
||||||
|
|
||||||
# %% [markdown]
|
# %% [markdown]
|
||||||
# ### Phone sensor groups
|
# ### Phone sensor groups
|
||||||
make_predictions_with_sensor_groups(model_input.copy(), groups_substrings="_", include_group=True)
|
# make_predictions_with_sensor_groups(model_input.copy(), groups_substrings="_", include_group=True)
|
||||||
phone_sensors = ["phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery", "phone_calls_", "phone_data_yield_",
|
# phone_sensors = ["phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery", "phone_calls_", "phone_data_yield_",
|
||||||
"phone_light_", "phone_location_", "phone_messages", "phone_screen_", "phone_speech_"]
|
# "phone_light_", "phone_location_", "phone_messages", "phone_screen_", "phone_speech_"]
|
||||||
make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=phone_sensors, include_group=False)
|
# make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=phone_sensors, include_group=False)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Write all the sensors (phone, empatica), seperate other (demographical) cols also
|
# Write all the sensors (phone, empatica), seperate other (demographical) cols also
|
||||||
|
|
||||||
sensors_features_groups = ["_", "phone_", "empatica_", "empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_",
|
sensors_features_groups = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_",
|
||||||
"phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery", "phone_calls_", "phone_data_yield_", "phone_light_"
|
"phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_data_yield_", "phone_light_",
|
||||||
"phone_location_", "phone_messages", "phone_screen_", "phone_speech_"]
|
"phone_locations_", "phone_messages", "phone_screen_"] # , "phone_speech_"]
|
||||||
|
# %%
|
||||||
|
def find_sensor_group_features_importance(model_input, sensor_groups_strings):
|
||||||
|
sensor_importance_scores = []
|
||||||
|
model_input = model_input.copy()
|
||||||
|
sensor_groups_strings = sensor_groups_strings.copy()
|
||||||
|
groups_len = len(sensor_groups_strings)
|
||||||
|
for i in range(groups_len):
|
||||||
|
important_cols = [col[0] for col in sensor_importance_scores]
|
||||||
|
with_cols = [col for col in model_input.columns if any(col.startswith(y) for y in important_cols)]
|
||||||
|
|
||||||
|
|
||||||
|
best_sensor, best_recall_score, best_f1_sore = \
|
||||||
|
make_predictions_with_sensor_groups(model_input,
|
||||||
|
groups_substrings=sensor_groups_strings, include_group=True,
|
||||||
|
with_cols=with_cols)
|
||||||
|
sensor_importance_scores.append((best_sensor, best_recall_score, best_f1_sore))
|
||||||
|
print(f"\nAdded sensor: {best_sensor}\n")
|
||||||
|
sensor_groups_strings.remove(best_sensor)
|
||||||
|
|
||||||
print([col for col in model_input.columns if "phone_" not in col or "empatica_" not in col])
|
return sensor_importance_scores
|
||||||
|
|
||||||
|
# sensor_importance_scores = find_sensor_group_features_importance(model_input, big_groups_substr)
|
||||||
|
sensor_groups_importance_scores = find_sensor_group_features_importance(model_input, sensors_features_groups)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
best_sensor_group = sensor_groups_importance_scores[0][0]
|
||||||
|
best_sensor_features = [col for col in model_input if col.startswith(best_sensor_group)]
|
||||||
|
|
||||||
|
best_sensor_group_importance_scores = find_sensor_group_features_importance(model_input, best_sensor_features)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
|
||||||
|
# TODO: visualization of sensor_group and best_sensor_group importance (Recall/f1-score chart)
|
Loading…
Reference in New Issue