Sum up confusion matrix and illustrate use with dummy.

master
junos 2023-05-31 17:27:49 +02:00
parent bc78a1d498
commit 97113fe9ab
1 changed files with 28 additions and 0 deletions

View File

@ -425,6 +425,15 @@ def confusion_matrix_scorer(clf, X, y):
return {"tn": cm[0, 0], "fp": cm[0, 1], "fn": cm[1, 0], "tp": cm[1, 1]} return {"tn": cm[0, 0], "fp": cm[0, 1], "fn": cm[1, 0], "tp": cm[1, 1]}
def aggregate_confusion_matrix(scores_dict: dict) -> pd.DataFrame:
scores_aggregated = aggregate_and_transpose(
pd.DataFrame(scores_dict), statistics=["sum"]
)
return scores_aggregated[
~scores_aggregated.test_metric.isin(["fit_time", "score_time"])
]
def run_all_classification_models( def run_all_classification_models(
data_x: pd.DataFrame, data_x: pd.DataFrame,
data_y: pd.DataFrame, data_y: pd.DataFrame,
@ -455,10 +464,29 @@ def run_all_classification_models(
error_score="raise", error_score="raise",
scoring=metrics, scoring=metrics,
) )
dummy_confusion_matrix = cross_validate(
dummy_class,
X=data_x,
y=data_y,
groups=data_groups,
cv=cross_validator,
n_jobs=-1,
error_score="raise",
scoring=confusion_matrix_scorer,
)
print("Dummy") print("Dummy")
scores_df = pd.DataFrame(dummy_score)[test_metrics] scores_df = pd.DataFrame(dummy_score)[test_metrics]
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"]) scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
scores_df = pd.concat(
[
scores_df,
aggregate_confusion_matrix(dummy_confusion_matrix).rename(
columns={"sum": "mean"}
# Note: the column is misleadingly renamed to get concise output.
),
]
)
scores_df["method"] = "dummy_classifier" scores_df["method"] = "dummy_classifier"
scores = pd.concat([scores, scores_df]) scores = pd.concat([scores, scores_df])
del dummy_class del dummy_class