Thoroughly refactor classification runner.

master
junos 2023-05-10 22:50:00 +02:00
parent 35c09374dd
commit 91e7352480
1 changed files with 27 additions and 8 deletions

View File

@ -407,7 +407,12 @@ def run_all_regression_models(
return scores return scores
def run_all_classification_models(data_x, data_y, data_groups, cv_method): def run_all_classification_models(
data_x: pd.DataFrame,
data_y: pd.DataFrame,
data_groups: pd.DataFrame,
cross_validator: BaseCrossValidator,
):
metrics = ["accuracy", "average_precision", "recall", "f1"] metrics = ["accuracy", "average_precision", "recall", "f1"]
test_metrics = ["test_" + metric for metric in metrics] test_metrics = ["test_" + metric for metric in metrics]
@ -420,7 +425,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
X=data_x, X=data_x,
y=data_y, y=data_y,
groups=data_groups, groups=data_groups,
cv=cv_method, cv=cross_validator,
n_jobs=-1, n_jobs=-1,
error_score="raise", error_score="raise",
scoring=metrics, scoring=metrics,
@ -431,6 +436,8 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
scores_df = scores_df.agg(["max", "mean"]).transpose() scores_df = scores_df.agg(["max", "mean"]).transpose()
scores_df["method"] = "Dummy" scores_df["method"] = "Dummy"
scores = pd.concat([scores, scores_df]) scores = pd.concat([scores, scores_df])
del dummy_class
del dummy_score
logistic_regression = linear_model.LogisticRegression() logistic_regression = linear_model.LogisticRegression()
@ -439,7 +446,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
X=data_x, X=data_x,
y=data_y, y=data_y,
groups=data_groups, groups=data_groups,
cv=cv_method, cv=cross_validator,
n_jobs=-1, n_jobs=-1,
scoring=metrics, scoring=metrics,
) )
@ -449,6 +456,8 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
scores_df = scores_df.agg(["max", "mean"]).transpose() scores_df = scores_df.agg(["max", "mean"]).transpose()
scores_df["method"] = "logistic_reg" scores_df["method"] = "logistic_reg"
scores = pd.concat([scores, scores_df]) scores = pd.concat([scores, scores_df])
del logistic_regression
del log_reg_scores
svc = svm.SVC() svc = svm.SVC()
@ -457,7 +466,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
X=data_x, X=data_x,
y=data_y, y=data_y,
groups=data_groups, groups=data_groups,
cv=cv_method, cv=cross_validator,
n_jobs=-1, n_jobs=-1,
scoring=metrics, scoring=metrics,
) )
@ -467,6 +476,8 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
scores_df = scores_df.agg(["max", "mean"]).transpose() scores_df = scores_df.agg(["max", "mean"]).transpose()
scores_df["method"] = "svc" scores_df["method"] = "svc"
scores = pd.concat([scores, scores_df]) scores = pd.concat([scores, scores_df])
del svc
del svc_scores
gaussian_nb = naive_bayes.GaussianNB() gaussian_nb = naive_bayes.GaussianNB()
@ -475,7 +486,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
X=data_x, X=data_x,
y=data_y, y=data_y,
groups=data_groups, groups=data_groups,
cv=cv_method, cv=cross_validator,
n_jobs=-1, n_jobs=-1,
scoring=metrics, scoring=metrics,
) )
@ -485,6 +496,8 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
scores_df = scores_df.agg(["max", "mean"]).transpose() scores_df = scores_df.agg(["max", "mean"]).transpose()
scores_df["method"] = "gaussian_naive_bayes" scores_df["method"] = "gaussian_naive_bayes"
scores = pd.concat([scores, scores_df]) scores = pd.concat([scores, scores_df])
del gaussian_nb
del gaussian_nb_scores
sgdc = linear_model.SGDClassifier() sgdc = linear_model.SGDClassifier()
@ -493,7 +506,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
X=data_x, X=data_x,
y=data_y, y=data_y,
groups=data_groups, groups=data_groups,
cv=cv_method, cv=cross_validator,
n_jobs=-1, n_jobs=-1,
scoring=metrics, scoring=metrics,
) )
@ -503,6 +516,8 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
scores_df = scores_df.agg(["max", "mean"]).transpose() scores_df = scores_df.agg(["max", "mean"]).transpose()
scores_df["method"] = "stochastic_gradient_descent" scores_df["method"] = "stochastic_gradient_descent"
scores = pd.concat([scores, scores_df]) scores = pd.concat([scores, scores_df])
del sgdc
del sgdc_scores
rfc = ensemble.RandomForestClassifier() rfc = ensemble.RandomForestClassifier()
@ -511,7 +526,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
X=data_x, X=data_x,
y=data_y, y=data_y,
groups=data_groups, groups=data_groups,
cv=cv_method, cv=cross_validator,
n_jobs=-1, n_jobs=-1,
scoring=metrics, scoring=metrics,
) )
@ -521,6 +536,8 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
scores_df = scores_df.agg(["max", "mean"]).transpose() scores_df = scores_df.agg(["max", "mean"]).transpose()
scores_df["method"] = "random_forest" scores_df["method"] = "random_forest"
scores = pd.concat([scores, scores_df]) scores = pd.concat([scores, scores_df])
del rfc
del rfc_scores
xgb_classifier = XGBClassifier() xgb_classifier = XGBClassifier()
@ -529,7 +546,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
X=data_x, X=data_x,
y=data_y, y=data_y,
groups=data_groups, groups=data_groups,
cv=cv_method, cv=cross_validator,
n_jobs=-1, n_jobs=-1,
scoring=metrics, scoring=metrics,
) )
@ -539,5 +556,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
scores_df = scores_df.agg(["max", "mean"]).transpose() scores_df = scores_df.agg(["max", "mean"]).transpose()
scores_df["method"] = "xgboost" scores_df["method"] = "xgboost"
scores = pd.concat([scores, scores_df]) scores = pd.concat([scores, scores_df])
del xgb_classifier
del xgb_scores
return scores return scores