Add confusion matrices for all methods.

master
junos 2023-05-31 17:41:50 +02:00
parent 97113fe9ab
commit a9af113c9c
1 changed files with 115 additions and 0 deletions

View File

@ -491,6 +491,7 @@ def run_all_classification_models(
scores = pd.concat([scores, scores_df])
del dummy_class
del dummy_score
del dummy_confusion_matrix
logistic_regression = linear_model.LogisticRegression()
@ -503,14 +504,33 @@ def run_all_classification_models(
n_jobs=-1,
scoring=metrics,
)
log_reg_confusion_matrix = cross_validate(
logistic_regression,
X=data_x,
y=data_y,
groups=data_groups,
cv=cross_validator,
n_jobs=-1,
scoring=confusion_matrix_scorer,
)
print("Logistic regression")
scores_df = pd.DataFrame(log_reg_scores)[test_metrics]
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
scores_df = pd.concat(
[
scores_df,
aggregate_confusion_matrix(log_reg_confusion_matrix).rename(
columns={"sum": "mean"}
# Note: the column is misleadingly renamed to get concise output.
),
]
)
scores_df["method"] = "logistic_regression"
scores = pd.concat([scores, scores_df])
del logistic_regression
del log_reg_scores
del log_reg_confusion_matrix
svc = svm.SVC()
@ -523,14 +543,33 @@ def run_all_classification_models(
n_jobs=-1,
scoring=metrics,
)
svc_confusion_matrix = cross_validate(
svc,
X=data_x,
y=data_y,
groups=data_groups,
cv=cross_validator,
n_jobs=-1,
scoring=confusion_matrix_scorer,
)
print("Support Vector Machine")
scores_df = pd.DataFrame(svc_scores)[test_metrics]
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
scores_df = pd.concat(
[
scores_df,
aggregate_confusion_matrix(svc_confusion_matrix).rename(
columns={"sum": "mean"}
# Note: the column is misleadingly renamed to get concise output.
),
]
)
scores_df["method"] = "SVC"
scores = pd.concat([scores, scores_df])
del svc
del svc_scores
del svc_confusion_matrix
gaussian_nb = naive_bayes.GaussianNB()
@ -543,14 +582,33 @@ def run_all_classification_models(
n_jobs=-1,
scoring=metrics,
)
gaussian_nb_confusion_matrix = cross_validate(
gaussian_nb,
X=data_x,
y=data_y,
groups=data_groups,
cv=cross_validator,
n_jobs=-1,
scoring=confusion_matrix_scorer,
)
print("Gaussian Naive Bayes")
scores_df = pd.DataFrame(gaussian_nb_scores)[test_metrics]
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
scores_df = pd.concat(
[
scores_df,
aggregate_confusion_matrix(gaussian_nb_confusion_matrix).rename(
columns={"sum": "mean"}
# Note: the column is misleadingly renamed to get concise output.
),
]
)
scores_df["method"] = "gaussian_naive_bayes"
scores = pd.concat([scores, scores_df])
del gaussian_nb
del gaussian_nb_scores
del gaussian_nb_confusion_matrix
sgdc = linear_model.SGDClassifier()
@ -563,14 +621,33 @@ def run_all_classification_models(
n_jobs=-1,
scoring=metrics,
)
sgdc_confusion_matrix = cross_validate(
sgdc,
X=data_x,
y=data_y,
groups=data_groups,
cv=cross_validator,
n_jobs=-1,
scoring=confusion_matrix_scorer,
)
print("Stochastic Gradient Descent")
scores_df = pd.DataFrame(sgdc_scores)[test_metrics]
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
scores_df = pd.concat(
[
scores_df,
aggregate_confusion_matrix(sgdc_confusion_matrix).rename(
columns={"sum": "mean"}
# Note: the column is misleadingly renamed to get concise output.
),
]
)
scores_df["method"] = "stochastic_gradient_descent_classifier"
scores = pd.concat([scores, scores_df])
del sgdc
del sgdc_scores
del sgdc_confusion_matrix
rfc = ensemble.RandomForestClassifier()
@ -583,14 +660,33 @@ def run_all_classification_models(
n_jobs=-1,
scoring=metrics,
)
rfc_confusion_matrix = cross_validate(
rfc,
X=data_x,
y=data_y,
groups=data_groups,
cv=cross_validator,
n_jobs=-1,
scoring=confusion_matrix_scorer,
)
print("Random Forest")
scores_df = pd.DataFrame(rfc_scores)[test_metrics]
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
scores_df = pd.concat(
[
scores_df,
aggregate_confusion_matrix(rfc_confusion_matrix).rename(
columns={"sum": "mean"}
# Note: the column is misleadingly renamed to get concise output.
),
]
)
scores_df["method"] = "random_forest_classifier"
scores = pd.concat([scores, scores_df])
del rfc
del rfc_scores
del rfc_confusion_matrix
xgb_classifier = XGBClassifier()
@ -603,13 +699,32 @@ def run_all_classification_models(
n_jobs=-1,
scoring=metrics,
)
xgb_confusion_matrix = cross_validate(
xgb_classifier,
X=data_x,
y=data_y,
groups=data_groups,
cv=cross_validator,
n_jobs=-1,
scoring=confusion_matrix_scorer,
)
print("XGBoost")
scores_df = pd.DataFrame(xgb_scores)[test_metrics]
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
scores_df = pd.concat(
[
scores_df,
aggregate_confusion_matrix(xgb_confusion_matrix).rename(
columns={"sum": "mean"}
# Note: the column is misleadingly renamed to get concise output.
),
]
)
scores_df["method"] = "XGBoost_classifier"
scores = pd.concat([scores, scores_df])
del xgb_classifier
del xgb_scores
del xgb_confusion_matrix
return scores