From 97113fe9ab5023808cfbc62f9172ede63403fc29 Mon Sep 17 00:00:00 2001 From: junos Date: Wed, 31 May 2023 17:27:49 +0200 Subject: [PATCH] Sum up confusion matrix and illustrate use with dummy. --- machine_learning/helper.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/machine_learning/helper.py b/machine_learning/helper.py index 32ecc98..f008716 100644 --- a/machine_learning/helper.py +++ b/machine_learning/helper.py @@ -425,6 +425,15 @@ def confusion_matrix_scorer(clf, X, y): return {"tn": cm[0, 0], "fp": cm[0, 1], "fn": cm[1, 0], "tp": cm[1, 1]} +def aggregate_confusion_matrix(scores_dict: dict) -> pd.DataFrame: + scores_aggregated = aggregate_and_transpose( + pd.DataFrame(scores_dict), statistics=["sum"] + ) + return scores_aggregated[ + ~scores_aggregated.test_metric.isin(["fit_time", "score_time"]) + ] + + def run_all_classification_models( data_x: pd.DataFrame, data_y: pd.DataFrame, @@ -455,10 +464,29 @@ def run_all_classification_models( error_score="raise", scoring=metrics, ) + dummy_confusion_matrix = cross_validate( + dummy_class, + X=data_x, + y=data_y, + groups=data_groups, + cv=cross_validator, + n_jobs=-1, + error_score="raise", + scoring=confusion_matrix_scorer, + ) print("Dummy") scores_df = pd.DataFrame(dummy_score)[test_metrics] scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"]) + scores_df = pd.concat( + [ + scores_df, + aggregate_confusion_matrix(dummy_confusion_matrix).rename( + columns={"sum": "mean"} + # Note: the column is misleadingly renamed to get concise output. + ), + ] + ) scores_df["method"] = "dummy_classifier" scores = pd.concat([scores, scores_df]) del dummy_class