diff --git a/exploration/expl_features_groups_analysis.py b/exploration/expl_features_groups_analysis.py index 693a45d..7810972 100644 --- a/exploration/expl_features_groups_analysis.py +++ b/exploration/expl_features_groups_analysis.py @@ -170,7 +170,7 @@ make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=big_gr # %% [markdown] # ### Phone sensor groups # make_predictions_with_sensor_groups(model_input.copy(), groups_substrings="_", include_group=True) -# phone_sensors = ["phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery", "phone_calls_", "phone_data_yield_", +# phone_sensors = ["phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery", "phone_calls_", # "phone_light_", "phone_location_", "phone_messages", "phone_screen_", "phone_speech_"] # make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=phone_sensors, include_group=False) @@ -178,7 +178,7 @@ make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=big_gr # Write all the sensors (phone, empatica), seperate other (demographical) cols also sensors_features_groups = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_", - "phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_data_yield_", "phone_light_", + "phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_light_", "phone_locations_", "phone_messages", "phone_screen_"] # , "phone_speech_"] # %% def find_sensor_group_features_importance(model_input, sensor_groups_strings): @@ -270,7 +270,7 @@ def plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore, # %% sensors_features_groups = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_", - "phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_data_yield_", "phone_light_", + "phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_light_", "phone_locations_", "phone_messages", "phone_screen_"] # , "phone_speech_"] # sensors_features_groups = ["phone_", "empatica_", "demo_"] @@ -293,7 +293,7 @@ best_sensor_features = [col for col in model_input if col.startswith(best_sensor best_sensor_features_scores = find_sensor_group_features_importance(model_input, best_sensor_features) -xs, y_recall, y_fscore = sort_tuples_to_lists(best_sensor_features_scores) +xs, y_recall, y_fscore, recall_std, fscore_std = sort_tuples_to_lists(best_sensor_features_scores) # %% [markdown] # ### Visualize best sensor's F1 and recall scores @@ -309,10 +309,11 @@ for i, sensor_group in enumerate(sensor_groups_importance_scores): current_sensor_features = [col for col in model_input if col.startswith(sensor_group[0])] current_sensor_features_scores = find_sensor_group_features_importance(model_input, current_sensor_features) - xs, y_recall, y_fscore = sort_tuples_to_lists(current_sensor_features_scores) - feature_sequence = feature_sequence.append(pd.DataFrame({"sensor_name":sensor_group[0], "feature_sequence": [xs], "recall": [y_recall], "f1_score": [y_fscore]})) + xs, y_recall, y_fscore, recall_std, fscore_std = sort_tuples_to_lists(current_sensor_features_scores) + feature_sequence = feature_sequence.append(pd.DataFrame({"sensor_name":sensor_group[0], "feature_sequence": [xs], "recall": [y_recall], + "f1_score": [y_fscore], "recall_std": [recall_std], "f1_std": [fscore_std]})) - plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore, + plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore, recall_std, fscore_std, title=f"Sequential addition of features for {sensor_group[0]} and its F1, and recall scores") feature_sequence.to_excel("all_sensors_sequential_addition_scores.xlsx", index=False)