Thoroughly refactor classification runner.
parent
35c09374dd
commit
91e7352480
|
@ -407,7 +407,12 @@ def run_all_regression_models(
|
||||||
return scores
|
return scores
|
||||||
|
|
||||||
|
|
||||||
def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
def run_all_classification_models(
|
||||||
|
data_x: pd.DataFrame,
|
||||||
|
data_y: pd.DataFrame,
|
||||||
|
data_groups: pd.DataFrame,
|
||||||
|
cross_validator: BaseCrossValidator,
|
||||||
|
):
|
||||||
metrics = ["accuracy", "average_precision", "recall", "f1"]
|
metrics = ["accuracy", "average_precision", "recall", "f1"]
|
||||||
test_metrics = ["test_" + metric for metric in metrics]
|
test_metrics = ["test_" + metric for metric in metrics]
|
||||||
|
|
||||||
|
@ -420,7 +425,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
X=data_x,
|
X=data_x,
|
||||||
y=data_y,
|
y=data_y,
|
||||||
groups=data_groups,
|
groups=data_groups,
|
||||||
cv=cv_method,
|
cv=cross_validator,
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
error_score="raise",
|
error_score="raise",
|
||||||
scoring=metrics,
|
scoring=metrics,
|
||||||
|
@ -431,6 +436,8 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||||
scores_df["method"] = "Dummy"
|
scores_df["method"] = "Dummy"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
|
del dummy_class
|
||||||
|
del dummy_score
|
||||||
|
|
||||||
logistic_regression = linear_model.LogisticRegression()
|
logistic_regression = linear_model.LogisticRegression()
|
||||||
|
|
||||||
|
@ -439,7 +446,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
X=data_x,
|
X=data_x,
|
||||||
y=data_y,
|
y=data_y,
|
||||||
groups=data_groups,
|
groups=data_groups,
|
||||||
cv=cv_method,
|
cv=cross_validator,
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
scoring=metrics,
|
scoring=metrics,
|
||||||
)
|
)
|
||||||
|
@ -449,6 +456,8 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||||
scores_df["method"] = "logistic_reg"
|
scores_df["method"] = "logistic_reg"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
|
del logistic_regression
|
||||||
|
del log_reg_scores
|
||||||
|
|
||||||
svc = svm.SVC()
|
svc = svm.SVC()
|
||||||
|
|
||||||
|
@ -457,7 +466,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
X=data_x,
|
X=data_x,
|
||||||
y=data_y,
|
y=data_y,
|
||||||
groups=data_groups,
|
groups=data_groups,
|
||||||
cv=cv_method,
|
cv=cross_validator,
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
scoring=metrics,
|
scoring=metrics,
|
||||||
)
|
)
|
||||||
|
@ -467,6 +476,8 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||||
scores_df["method"] = "svc"
|
scores_df["method"] = "svc"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
|
del svc
|
||||||
|
del svc_scores
|
||||||
|
|
||||||
gaussian_nb = naive_bayes.GaussianNB()
|
gaussian_nb = naive_bayes.GaussianNB()
|
||||||
|
|
||||||
|
@ -475,7 +486,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
X=data_x,
|
X=data_x,
|
||||||
y=data_y,
|
y=data_y,
|
||||||
groups=data_groups,
|
groups=data_groups,
|
||||||
cv=cv_method,
|
cv=cross_validator,
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
scoring=metrics,
|
scoring=metrics,
|
||||||
)
|
)
|
||||||
|
@ -485,6 +496,8 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||||
scores_df["method"] = "gaussian_naive_bayes"
|
scores_df["method"] = "gaussian_naive_bayes"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
|
del gaussian_nb
|
||||||
|
del gaussian_nb_scores
|
||||||
|
|
||||||
sgdc = linear_model.SGDClassifier()
|
sgdc = linear_model.SGDClassifier()
|
||||||
|
|
||||||
|
@ -493,7 +506,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
X=data_x,
|
X=data_x,
|
||||||
y=data_y,
|
y=data_y,
|
||||||
groups=data_groups,
|
groups=data_groups,
|
||||||
cv=cv_method,
|
cv=cross_validator,
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
scoring=metrics,
|
scoring=metrics,
|
||||||
)
|
)
|
||||||
|
@ -503,6 +516,8 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||||
scores_df["method"] = "stochastic_gradient_descent"
|
scores_df["method"] = "stochastic_gradient_descent"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
|
del sgdc
|
||||||
|
del sgdc_scores
|
||||||
|
|
||||||
rfc = ensemble.RandomForestClassifier()
|
rfc = ensemble.RandomForestClassifier()
|
||||||
|
|
||||||
|
@ -511,7 +526,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
X=data_x,
|
X=data_x,
|
||||||
y=data_y,
|
y=data_y,
|
||||||
groups=data_groups,
|
groups=data_groups,
|
||||||
cv=cv_method,
|
cv=cross_validator,
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
scoring=metrics,
|
scoring=metrics,
|
||||||
)
|
)
|
||||||
|
@ -521,6 +536,8 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||||
scores_df["method"] = "random_forest"
|
scores_df["method"] = "random_forest"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
|
del rfc
|
||||||
|
del rfc_scores
|
||||||
|
|
||||||
xgb_classifier = XGBClassifier()
|
xgb_classifier = XGBClassifier()
|
||||||
|
|
||||||
|
@ -529,7 +546,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
X=data_x,
|
X=data_x,
|
||||||
y=data_y,
|
y=data_y,
|
||||||
groups=data_groups,
|
groups=data_groups,
|
||||||
cv=cv_method,
|
cv=cross_validator,
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
scoring=metrics,
|
scoring=metrics,
|
||||||
)
|
)
|
||||||
|
@ -539,5 +556,7 @@ def run_all_classification_models(data_x, data_y, data_groups, cv_method):
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||||
scores_df["method"] = "xgboost"
|
scores_df["method"] = "xgboost"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
|
del xgb_classifier
|
||||||
|
del xgb_scores
|
||||||
|
|
||||||
return scores
|
return scores
|
||||||
|
|
Loading…
Reference in New Issue