Retain metric names in final scores.
parent
b756ed5feb
commit
2336edffb6
|
@ -163,6 +163,17 @@ def prepare_cross_validator(
|
|||
return cv
|
||||
|
||||
|
||||
def aggregate_and_transpose(df: pd.DataFrame, statistics=None) -> pd.DataFrame:
|
||||
if statistics is None:
|
||||
statistics = ["max", "mean"]
|
||||
return (
|
||||
df.agg(statistics)
|
||||
.transpose()
|
||||
.reset_index()
|
||||
.rename(columns={"index": "test_metric"})
|
||||
)
|
||||
|
||||
|
||||
def run_all_regression_models(
|
||||
data_x: pd.DataFrame,
|
||||
data_y: pd.DataFrame,
|
||||
|
@ -171,7 +182,7 @@ def run_all_regression_models(
|
|||
) -> pd.DataFrame:
|
||||
metrics = ["r2", "neg_mean_absolute_error", "neg_root_mean_squared_error"]
|
||||
test_metrics = ["test_" + metric for metric in metrics]
|
||||
scores = pd.DataFrame(columns=["method", "max", "nanmedian"])
|
||||
scores = pd.DataFrame(columns=["method", "metric", "max", "nanmedian"])
|
||||
|
||||
# Validate models
|
||||
dummy_regr = DummyRegressor(strategy="mean")
|
||||
|
@ -188,7 +199,7 @@ def run_all_regression_models(
|
|||
print("R^2: ", np.nanmedian(dummy_regr_scores["test_r2"]))
|
||||
|
||||
scores_df = pd.DataFrame(dummy_regr_scores)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||
scores_df["method"] = "dummy"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del dummy_regr
|
||||
|
@ -208,7 +219,7 @@ def run_all_regression_models(
|
|||
print("R^2: ", np.nanmedian(lin_reg_scores["test_r2"]))
|
||||
|
||||
scores_df = pd.DataFrame(lin_reg_scores)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||
scores_df["method"] = "linear_reg"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del lin_reg
|
||||
|
@ -227,7 +238,7 @@ def run_all_regression_models(
|
|||
print("Ridge regression")
|
||||
|
||||
scores_df = pd.DataFrame(ridge_reg_scores)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||
scores_df["method"] = "ridge_reg"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del ridge_reg
|
||||
|
@ -246,7 +257,7 @@ def run_all_regression_models(
|
|||
print("Lasso regression")
|
||||
|
||||
scores_df = pd.DataFrame(lasso_reg_score)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||
scores_df["method"] = "lasso_reg"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del lasso_reg
|
||||
|
@ -265,7 +276,7 @@ def run_all_regression_models(
|
|||
print("Bayesian Ridge")
|
||||
|
||||
scores_df = pd.DataFrame(bayesian_ridge_reg_score)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||
scores_df["method"] = "bayesian_ridge"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del bayesian_ridge_reg
|
||||
|
@ -284,7 +295,7 @@ def run_all_regression_models(
|
|||
print("RANSAC (outlier robust regression)")
|
||||
|
||||
scores_df = pd.DataFrame(ransac_reg_score)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||
scores_df["method"] = "RANSAC"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del ransac_reg
|
||||
|
@ -303,7 +314,7 @@ def run_all_regression_models(
|
|||
print("Support vector regression")
|
||||
|
||||
scores_df = pd.DataFrame(svr_score)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||
scores_df["method"] = "SVR"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del svr
|
||||
|
@ -322,7 +333,7 @@ def run_all_regression_models(
|
|||
print("Kernel Ridge regression")
|
||||
|
||||
scores_df = pd.DataFrame(kridge_score)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||
scores_df["method"] = "kernel_ridge"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del kridge
|
||||
|
@ -341,7 +352,7 @@ def run_all_regression_models(
|
|||
print("Gaussian Process Regression")
|
||||
|
||||
scores_df = pd.DataFrame(gpr_score)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||
scores_df["method"] = "gaussian_proc"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del gpr
|
||||
|
@ -360,7 +371,7 @@ def run_all_regression_models(
|
|||
print("Random Forest Regression")
|
||||
|
||||
scores_df = pd.DataFrame(rfr_score)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||
scores_df["method"] = "random_forest"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del rfr
|
||||
|
@ -379,7 +390,7 @@ def run_all_regression_models(
|
|||
print("XGBoost Regressor")
|
||||
|
||||
scores_df = pd.DataFrame(xgb_score)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||
scores_df["method"] = "XGBoost"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del xgb
|
||||
|
@ -398,7 +409,7 @@ def run_all_regression_models(
|
|||
print("ADA Boost Regressor")
|
||||
|
||||
scores_df = pd.DataFrame(ada_score)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||
scores_df["method"] = "ADA_boost"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del ada
|
||||
|
@ -416,7 +427,7 @@ def run_all_classification_models(
|
|||
metrics = ["accuracy", "average_precision", "recall", "f1"]
|
||||
test_metrics = ["test_" + metric for metric in metrics]
|
||||
|
||||
scores = pd.DataFrame(columns=["method", "max", "mean"])
|
||||
scores = pd.DataFrame(columns=["method", "metric", "max", "mean"])
|
||||
|
||||
dummy_class = DummyClassifier(strategy="most_frequent")
|
||||
|
||||
|
@ -433,7 +444,7 @@ def run_all_classification_models(
|
|||
print("Dummy")
|
||||
|
||||
scores_df = pd.DataFrame(dummy_score)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||
scores_df["method"] = "Dummy"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del dummy_class
|
||||
|
@ -453,7 +464,7 @@ def run_all_classification_models(
|
|||
print("Logistic regression")
|
||||
|
||||
scores_df = pd.DataFrame(log_reg_scores)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||
scores_df["method"] = "logistic_reg"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del logistic_regression
|
||||
|
@ -473,7 +484,7 @@ def run_all_classification_models(
|
|||
print("Support Vector Machine")
|
||||
|
||||
scores_df = pd.DataFrame(svc_scores)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||
scores_df["method"] = "svc"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del svc
|
||||
|
@ -493,7 +504,7 @@ def run_all_classification_models(
|
|||
print("Gaussian Naive Bayes")
|
||||
|
||||
scores_df = pd.DataFrame(gaussian_nb_scores)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||
scores_df["method"] = "gaussian_naive_bayes"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del gaussian_nb
|
||||
|
@ -513,7 +524,7 @@ def run_all_classification_models(
|
|||
print("Stochastic Gradient Descent")
|
||||
|
||||
scores_df = pd.DataFrame(sgdc_scores)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||
scores_df["method"] = "stochastic_gradient_descent"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del sgdc
|
||||
|
@ -533,7 +544,7 @@ def run_all_classification_models(
|
|||
print("Random Forest")
|
||||
|
||||
scores_df = pd.DataFrame(rfc_scores)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||
scores_df["method"] = "random_forest"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del rfc
|
||||
|
@ -553,7 +564,7 @@ def run_all_classification_models(
|
|||
print("XGBoost")
|
||||
|
||||
scores_df = pd.DataFrame(xgb_scores)[test_metrics]
|
||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
||||
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||
scores_df["method"] = "xgboost"
|
||||
scores = pd.concat([scores, scores_df])
|
||||
del xgb_classifier
|
||||
|
|
Loading…
Reference in New Issue