Add save to file code, and todo comment

2023-02-01 15:13:57 +01:00 · 2023-02-01 15:13:57 +01:00 · e3aef2dae7
parent b286753696
commit e3aef2dae7
1 changed files with 12 additions and 1 deletions
--- a/exploration/expl_features_groups_analysis.py
+++ b/exploration/expl_features_groups_analysis.py
@ -271,14 +271,25 @@ plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore)

 # %%
 # This section iterates over all sensor groups and investigates sequential feature importance feature-by-feature
+# It also saves the sequence of scores for all sensors' features in excel file
+seq_columns = ["sensor_name", "feature_sequence", "recall", "f1_score"]
+feature_sequence = pd.DataFrame(columns=seq_columns)
+for i, sensor_group in enumerate(sensor_groups_importance_scores):

-for sensor_group in sensor_groups_importance_scores:
    current_sensor_features = [col for col in model_input if col.startswith(sensor_group[0])]
    current_sensor_features_scores = find_sensor_group_features_importance(model_input, current_sensor_features)
    xs, y_recall, y_fscore = sort_tuples_to_lists(current_sensor_features_scores)
+    feature_sequence = feature_sequence.append(pd.DataFrame({"sensor_name":sensor_group[0], "feature_sequence": [xs], "recall": [y_recall], "f1_score": [y_fscore]}))

    plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore, 
    title=f"Sequential addition of features for {sensor_group[0]} and its F1, and recall scores")

+feature_sequence.to_excel("all_sensors_sequential_addition_scores.xlsx", index=False)

 # %%
+# TODO: method that reads data from the excel file, specified above, and then the method,
+# that selects only features that are max a thresh[%] below the max value (best for recall
+# possibly for f1). This method should additionally take threshold parameter.
+
+# %%
+