Add confusion matrices for all methods.
parent
97113fe9ab
commit
a9af113c9c
|
@ -491,6 +491,7 @@ def run_all_classification_models(
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del dummy_class
|
del dummy_class
|
||||||
del dummy_score
|
del dummy_score
|
||||||
|
del dummy_confusion_matrix
|
||||||
|
|
||||||
logistic_regression = linear_model.LogisticRegression()
|
logistic_regression = linear_model.LogisticRegression()
|
||||||
|
|
||||||
|
@ -503,14 +504,33 @@ def run_all_classification_models(
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
scoring=metrics,
|
scoring=metrics,
|
||||||
)
|
)
|
||||||
|
log_reg_confusion_matrix = cross_validate(
|
||||||
|
logistic_regression,
|
||||||
|
X=data_x,
|
||||||
|
y=data_y,
|
||||||
|
groups=data_groups,
|
||||||
|
cv=cross_validator,
|
||||||
|
n_jobs=-1,
|
||||||
|
scoring=confusion_matrix_scorer,
|
||||||
|
)
|
||||||
print("Logistic regression")
|
print("Logistic regression")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(log_reg_scores)[test_metrics]
|
scores_df = pd.DataFrame(log_reg_scores)[test_metrics]
|
||||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||||
|
scores_df = pd.concat(
|
||||||
|
[
|
||||||
|
scores_df,
|
||||||
|
aggregate_confusion_matrix(log_reg_confusion_matrix).rename(
|
||||||
|
columns={"sum": "mean"}
|
||||||
|
# Note: the column is misleadingly renamed to get concise output.
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
scores_df["method"] = "logistic_regression"
|
scores_df["method"] = "logistic_regression"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del logistic_regression
|
del logistic_regression
|
||||||
del log_reg_scores
|
del log_reg_scores
|
||||||
|
del log_reg_confusion_matrix
|
||||||
|
|
||||||
svc = svm.SVC()
|
svc = svm.SVC()
|
||||||
|
|
||||||
|
@ -523,14 +543,33 @@ def run_all_classification_models(
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
scoring=metrics,
|
scoring=metrics,
|
||||||
)
|
)
|
||||||
|
svc_confusion_matrix = cross_validate(
|
||||||
|
svc,
|
||||||
|
X=data_x,
|
||||||
|
y=data_y,
|
||||||
|
groups=data_groups,
|
||||||
|
cv=cross_validator,
|
||||||
|
n_jobs=-1,
|
||||||
|
scoring=confusion_matrix_scorer,
|
||||||
|
)
|
||||||
print("Support Vector Machine")
|
print("Support Vector Machine")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(svc_scores)[test_metrics]
|
scores_df = pd.DataFrame(svc_scores)[test_metrics]
|
||||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||||
|
scores_df = pd.concat(
|
||||||
|
[
|
||||||
|
scores_df,
|
||||||
|
aggregate_confusion_matrix(svc_confusion_matrix).rename(
|
||||||
|
columns={"sum": "mean"}
|
||||||
|
# Note: the column is misleadingly renamed to get concise output.
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
scores_df["method"] = "SVC"
|
scores_df["method"] = "SVC"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del svc
|
del svc
|
||||||
del svc_scores
|
del svc_scores
|
||||||
|
del svc_confusion_matrix
|
||||||
|
|
||||||
gaussian_nb = naive_bayes.GaussianNB()
|
gaussian_nb = naive_bayes.GaussianNB()
|
||||||
|
|
||||||
|
@ -543,14 +582,33 @@ def run_all_classification_models(
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
scoring=metrics,
|
scoring=metrics,
|
||||||
)
|
)
|
||||||
|
gaussian_nb_confusion_matrix = cross_validate(
|
||||||
|
gaussian_nb,
|
||||||
|
X=data_x,
|
||||||
|
y=data_y,
|
||||||
|
groups=data_groups,
|
||||||
|
cv=cross_validator,
|
||||||
|
n_jobs=-1,
|
||||||
|
scoring=confusion_matrix_scorer,
|
||||||
|
)
|
||||||
print("Gaussian Naive Bayes")
|
print("Gaussian Naive Bayes")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(gaussian_nb_scores)[test_metrics]
|
scores_df = pd.DataFrame(gaussian_nb_scores)[test_metrics]
|
||||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||||
|
scores_df = pd.concat(
|
||||||
|
[
|
||||||
|
scores_df,
|
||||||
|
aggregate_confusion_matrix(gaussian_nb_confusion_matrix).rename(
|
||||||
|
columns={"sum": "mean"}
|
||||||
|
# Note: the column is misleadingly renamed to get concise output.
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
scores_df["method"] = "gaussian_naive_bayes"
|
scores_df["method"] = "gaussian_naive_bayes"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del gaussian_nb
|
del gaussian_nb
|
||||||
del gaussian_nb_scores
|
del gaussian_nb_scores
|
||||||
|
del gaussian_nb_confusion_matrix
|
||||||
|
|
||||||
sgdc = linear_model.SGDClassifier()
|
sgdc = linear_model.SGDClassifier()
|
||||||
|
|
||||||
|
@ -563,14 +621,33 @@ def run_all_classification_models(
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
scoring=metrics,
|
scoring=metrics,
|
||||||
)
|
)
|
||||||
|
sgdc_confusion_matrix = cross_validate(
|
||||||
|
sgdc,
|
||||||
|
X=data_x,
|
||||||
|
y=data_y,
|
||||||
|
groups=data_groups,
|
||||||
|
cv=cross_validator,
|
||||||
|
n_jobs=-1,
|
||||||
|
scoring=confusion_matrix_scorer,
|
||||||
|
)
|
||||||
print("Stochastic Gradient Descent")
|
print("Stochastic Gradient Descent")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(sgdc_scores)[test_metrics]
|
scores_df = pd.DataFrame(sgdc_scores)[test_metrics]
|
||||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||||
|
scores_df = pd.concat(
|
||||||
|
[
|
||||||
|
scores_df,
|
||||||
|
aggregate_confusion_matrix(sgdc_confusion_matrix).rename(
|
||||||
|
columns={"sum": "mean"}
|
||||||
|
# Note: the column is misleadingly renamed to get concise output.
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
scores_df["method"] = "stochastic_gradient_descent_classifier"
|
scores_df["method"] = "stochastic_gradient_descent_classifier"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del sgdc
|
del sgdc
|
||||||
del sgdc_scores
|
del sgdc_scores
|
||||||
|
del sgdc_confusion_matrix
|
||||||
|
|
||||||
rfc = ensemble.RandomForestClassifier()
|
rfc = ensemble.RandomForestClassifier()
|
||||||
|
|
||||||
|
@ -583,14 +660,33 @@ def run_all_classification_models(
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
scoring=metrics,
|
scoring=metrics,
|
||||||
)
|
)
|
||||||
|
rfc_confusion_matrix = cross_validate(
|
||||||
|
rfc,
|
||||||
|
X=data_x,
|
||||||
|
y=data_y,
|
||||||
|
groups=data_groups,
|
||||||
|
cv=cross_validator,
|
||||||
|
n_jobs=-1,
|
||||||
|
scoring=confusion_matrix_scorer,
|
||||||
|
)
|
||||||
print("Random Forest")
|
print("Random Forest")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(rfc_scores)[test_metrics]
|
scores_df = pd.DataFrame(rfc_scores)[test_metrics]
|
||||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||||
|
scores_df = pd.concat(
|
||||||
|
[
|
||||||
|
scores_df,
|
||||||
|
aggregate_confusion_matrix(rfc_confusion_matrix).rename(
|
||||||
|
columns={"sum": "mean"}
|
||||||
|
# Note: the column is misleadingly renamed to get concise output.
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
scores_df["method"] = "random_forest_classifier"
|
scores_df["method"] = "random_forest_classifier"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del rfc
|
del rfc
|
||||||
del rfc_scores
|
del rfc_scores
|
||||||
|
del rfc_confusion_matrix
|
||||||
|
|
||||||
xgb_classifier = XGBClassifier()
|
xgb_classifier = XGBClassifier()
|
||||||
|
|
||||||
|
@ -603,13 +699,32 @@ def run_all_classification_models(
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
scoring=metrics,
|
scoring=metrics,
|
||||||
)
|
)
|
||||||
|
xgb_confusion_matrix = cross_validate(
|
||||||
|
xgb_classifier,
|
||||||
|
X=data_x,
|
||||||
|
y=data_y,
|
||||||
|
groups=data_groups,
|
||||||
|
cv=cross_validator,
|
||||||
|
n_jobs=-1,
|
||||||
|
scoring=confusion_matrix_scorer,
|
||||||
|
)
|
||||||
print("XGBoost")
|
print("XGBoost")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(xgb_scores)[test_metrics]
|
scores_df = pd.DataFrame(xgb_scores)[test_metrics]
|
||||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||||
|
scores_df = pd.concat(
|
||||||
|
[
|
||||||
|
scores_df,
|
||||||
|
aggregate_confusion_matrix(xgb_confusion_matrix).rename(
|
||||||
|
columns={"sum": "mean"}
|
||||||
|
# Note: the column is misleadingly renamed to get concise output.
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
scores_df["method"] = "XGBoost_classifier"
|
scores_df["method"] = "XGBoost_classifier"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del xgb_classifier
|
del xgb_classifier
|
||||||
del xgb_scores
|
del xgb_scores
|
||||||
|
del xgb_confusion_matrix
|
||||||
|
|
||||||
return scores
|
return scores
|
||||||
|
|
Loading…
Reference in New Issue