Add save to file code, and todo comment

ml_pipeline
Primoz 2023-02-01 15:13:57 +01:00
parent b286753696
commit e3aef2dae7
1 changed files with 12 additions and 1 deletions

View File

@ -271,14 +271,25 @@ plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore)
# %% # %%
# This section iterates over all sensor groups and investigates sequential feature importance feature-by-feature # This section iterates over all sensor groups and investigates sequential feature importance feature-by-feature
# It also saves the sequence of scores for all sensors' features in excel file
seq_columns = ["sensor_name", "feature_sequence", "recall", "f1_score"]
feature_sequence = pd.DataFrame(columns=seq_columns)
for i, sensor_group in enumerate(sensor_groups_importance_scores):
for sensor_group in sensor_groups_importance_scores:
current_sensor_features = [col for col in model_input if col.startswith(sensor_group[0])] current_sensor_features = [col for col in model_input if col.startswith(sensor_group[0])]
current_sensor_features_scores = find_sensor_group_features_importance(model_input, current_sensor_features) current_sensor_features_scores = find_sensor_group_features_importance(model_input, current_sensor_features)
xs, y_recall, y_fscore = sort_tuples_to_lists(current_sensor_features_scores) xs, y_recall, y_fscore = sort_tuples_to_lists(current_sensor_features_scores)
feature_sequence = feature_sequence.append(pd.DataFrame({"sensor_name":sensor_group[0], "feature_sequence": [xs], "recall": [y_recall], "f1_score": [y_fscore]}))
plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore, plot_sequential_progress_of_feature_addition_scores(xs, y_recall, y_fscore,
title=f"Sequential addition of features for {sensor_group[0]} and its F1, and recall scores") title=f"Sequential addition of features for {sensor_group[0]} and its F1, and recall scores")
feature_sequence.to_excel("all_sensors_sequential_addition_scores.xlsx", index=False)
# %% # %%
# TODO: method that reads data from the excel file, specified above, and then the method,
# that selects only features that are max a thresh[%] below the max value (best for recall
# possibly for f1). This method should additionally take threshold parameter.
# %%