Remove data_yield from features.

ml_pipeline
Primoz 2023-02-06 11:16:53 +01:00
parent ea3f805ba7
commit afeb7b4872
1 changed files with 8 additions and 7 deletions

View File

@ -170,7 +170,7 @@ make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=big_gr
# %% [markdown] # %% [markdown]
# ### Phone sensor groups # ### Phone sensor groups
# make_predictions_with_sensor_groups(model_input.copy(), groups_substrings="_", include_group=True) # make_predictions_with_sensor_groups(model_input.copy(), groups_substrings="_", include_group=True)
# phone_sensors = ["phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery", "phone_calls_", "phone_data_yield_", # phone_sensors = ["phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery", "phone_calls_",
# "phone_light_", "phone_location_", "phone_messages", "phone_screen_", "phone_speech_"] # "phone_light_", "phone_location_", "phone_messages", "phone_screen_", "phone_speech_"]
# make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=phone_sensors, include_group=False) # make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=phone_sensors, include_group=False)
@ -178,7 +178,7 @@ make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=big_gr
# Write all the sensors (phone, empatica), seperate other (demographical) cols also # Write all the sensors (phone, empatica), seperate other (demographical) cols also
sensors_features_groups = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_", sensors_features_groups = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_",
"phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_data_yield_", "phone_light_", "phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_light_",
"phone_locations_", "phone_messages", "phone_screen_"] # , "phone_speech_"] "phone_locations_", "phone_messages", "phone_screen_"] # , "phone_speech_"]
# %% # %%
def find_sensor_group_features_importance(model_input, sensor_groups_strings): def find_sensor_group_features_importance(model_input, sensor_groups_strings):
@ -270,7 +270,7 @@ def plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore,
# %% # %%
sensors_features_groups = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_", sensors_features_groups = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_",
"phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_data_yield_", "phone_light_", "phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_light_",
"phone_locations_", "phone_messages", "phone_screen_"] # , "phone_speech_"] "phone_locations_", "phone_messages", "phone_screen_"] # , "phone_speech_"]
# sensors_features_groups = ["phone_", "empatica_", "demo_"] # sensors_features_groups = ["phone_", "empatica_", "demo_"]
@ -293,7 +293,7 @@ best_sensor_features = [col for col in model_input if col.startswith(best_sensor
best_sensor_features_scores = find_sensor_group_features_importance(model_input, best_sensor_features) best_sensor_features_scores = find_sensor_group_features_importance(model_input, best_sensor_features)
xs, y_recall, y_fscore = sort_tuples_to_lists(best_sensor_features_scores) xs, y_recall, y_fscore, recall_std, fscore_std = sort_tuples_to_lists(best_sensor_features_scores)
# %% [markdown] # %% [markdown]
# ### Visualize best sensor's F1 and recall scores # ### Visualize best sensor's F1 and recall scores
@ -309,10 +309,11 @@ for i, sensor_group in enumerate(sensor_groups_importance_scores):
current_sensor_features = [col for col in model_input if col.startswith(sensor_group[0])] current_sensor_features = [col for col in model_input if col.startswith(sensor_group[0])]
current_sensor_features_scores = find_sensor_group_features_importance(model_input, current_sensor_features) current_sensor_features_scores = find_sensor_group_features_importance(model_input, current_sensor_features)
xs, y_recall, y_fscore = sort_tuples_to_lists(current_sensor_features_scores) xs, y_recall, y_fscore, recall_std, fscore_std = sort_tuples_to_lists(current_sensor_features_scores)
feature_sequence = feature_sequence.append(pd.DataFrame({"sensor_name":sensor_group[0], "feature_sequence": [xs], "recall": [y_recall], "f1_score": [y_fscore]})) feature_sequence = feature_sequence.append(pd.DataFrame({"sensor_name":sensor_group[0], "feature_sequence": [xs], "recall": [y_recall],
"f1_score": [y_fscore], "recall_std": [recall_std], "f1_std": [fscore_std]}))
plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore, plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore, recall_std, fscore_std,
title=f"Sequential addition of features for {sensor_group[0]} and its F1, and recall scores") title=f"Sequential addition of features for {sensor_group[0]} and its F1, and recall scores")
feature_sequence.to_excel("all_sensors_sequential_addition_scores.xlsx", index=False) feature_sequence.to_excel("all_sensors_sequential_addition_scores.xlsx", index=False)