Remove data_yield from features.

2023-02-06 11:16:53 +01:00 · 2023-02-06 11:16:53 +01:00 · afeb7b4872
parent ea3f805ba7
commit afeb7b4872
1 changed files with 8 additions and 7 deletions
--- a/exploration/expl_features_groups_analysis.py
+++ b/exploration/expl_features_groups_analysis.py
@ -170,7 +170,7 @@ make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=big_gr
 # %% [markdown]
 # ### Phone sensor groups
 # make_predictions_with_sensor_groups(model_input.copy(), groups_substrings="_", include_group=True)
-# phone_sensors = ["phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery", "phone_calls_", "phone_data_yield_", 
+# phone_sensors = ["phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery", "phone_calls_", 
 #                 "phone_light_", "phone_location_", "phone_messages", "phone_screen_", "phone_speech_"]
 # make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=phone_sensors, include_group=False)

@ -178,7 +178,7 @@ make_predictions_with_sensor_groups(model_input.copy(), groups_substrings=big_gr
 # Write all the sensors  (phone, empatica), seperate other (demographical) cols also

 sensors_features_groups = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_",
-                        "phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_data_yield_", "phone_light_",
+                        "phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_light_",
                        "phone_locations_", "phone_messages", "phone_screen_"] # , "phone_speech_"]
 # %%
 def find_sensor_group_features_importance(model_input, sensor_groups_strings):
@ -270,7 +270,7 @@ def plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore,

 # %%
 sensors_features_groups = ["empatica_inter_beat_", "empatica_accelerometer_", "empatica_temperature_", "empatica_electrodermal_",
-                        "phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_data_yield_", "phone_light_",
+                        "phone_activity_", "phone_applications_", "phone_bluetooth_", "phone_battery_", "phone_calls_", "phone_light_",
                        "phone_locations_", "phone_messages", "phone_screen_"] # , "phone_speech_"]

 # sensors_features_groups = ["phone_", "empatica_", "demo_"]
@ -293,7 +293,7 @@ best_sensor_features = [col for col in model_input if col.startswith(best_sensor

 best_sensor_features_scores = find_sensor_group_features_importance(model_input, best_sensor_features)

-xs, y_recall, y_fscore = sort_tuples_to_lists(best_sensor_features_scores)
+xs, y_recall, y_fscore, recall_std, fscore_std = sort_tuples_to_lists(best_sensor_features_scores)

 # %% [markdown]
 # ### Visualize best sensor's F1 and recall scores
@ -309,10 +309,11 @@ for i, sensor_group in enumerate(sensor_groups_importance_scores):

    current_sensor_features = [col for col in model_input if col.startswith(sensor_group[0])]
    current_sensor_features_scores = find_sensor_group_features_importance(model_input, current_sensor_features)
-    xs, y_recall, y_fscore = sort_tuples_to_lists(current_sensor_features_scores)
-    feature_sequence = feature_sequence.append(pd.DataFrame({"sensor_name":sensor_group[0], "feature_sequence": [xs], "recall": [y_recall], "f1_score": [y_fscore]}))
+    xs, y_recall, y_fscore, recall_std, fscore_std = sort_tuples_to_lists(current_sensor_features_scores)
+    feature_sequence = feature_sequence.append(pd.DataFrame({"sensor_name":sensor_group[0], "feature_sequence": [xs], "recall": [y_recall], 
+                                                             "f1_score": [y_fscore], "recall_std": [recall_std], "f1_std": [fscore_std]}))

-    plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore, 
+    plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore, recall_std, fscore_std, 
    title=f"Sequential addition of features for {sensor_group[0]} and its F1, and recall scores")

 feature_sequence.to_excel("all_sensors_sequential_addition_scores.xlsx", index=False)