Retain metric names in final scores.
parent
b756ed5feb
commit
2336edffb6
|
@ -163,6 +163,17 @@ def prepare_cross_validator(
|
||||||
return cv
|
return cv
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_and_transpose(df: pd.DataFrame, statistics=None) -> pd.DataFrame:
|
||||||
|
if statistics is None:
|
||||||
|
statistics = ["max", "mean"]
|
||||||
|
return (
|
||||||
|
df.agg(statistics)
|
||||||
|
.transpose()
|
||||||
|
.reset_index()
|
||||||
|
.rename(columns={"index": "test_metric"})
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def run_all_regression_models(
|
def run_all_regression_models(
|
||||||
data_x: pd.DataFrame,
|
data_x: pd.DataFrame,
|
||||||
data_y: pd.DataFrame,
|
data_y: pd.DataFrame,
|
||||||
|
@ -171,7 +182,7 @@ def run_all_regression_models(
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
metrics = ["r2", "neg_mean_absolute_error", "neg_root_mean_squared_error"]
|
metrics = ["r2", "neg_mean_absolute_error", "neg_root_mean_squared_error"]
|
||||||
test_metrics = ["test_" + metric for metric in metrics]
|
test_metrics = ["test_" + metric for metric in metrics]
|
||||||
scores = pd.DataFrame(columns=["method", "max", "nanmedian"])
|
scores = pd.DataFrame(columns=["method", "metric", "max", "nanmedian"])
|
||||||
|
|
||||||
# Validate models
|
# Validate models
|
||||||
dummy_regr = DummyRegressor(strategy="mean")
|
dummy_regr = DummyRegressor(strategy="mean")
|
||||||
|
@ -188,7 +199,7 @@ def run_all_regression_models(
|
||||||
print("R^2: ", np.nanmedian(dummy_regr_scores["test_r2"]))
|
print("R^2: ", np.nanmedian(dummy_regr_scores["test_r2"]))
|
||||||
|
|
||||||
scores_df = pd.DataFrame(dummy_regr_scores)[test_metrics]
|
scores_df = pd.DataFrame(dummy_regr_scores)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||||
scores_df["method"] = "dummy"
|
scores_df["method"] = "dummy"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del dummy_regr
|
del dummy_regr
|
||||||
|
@ -208,7 +219,7 @@ def run_all_regression_models(
|
||||||
print("R^2: ", np.nanmedian(lin_reg_scores["test_r2"]))
|
print("R^2: ", np.nanmedian(lin_reg_scores["test_r2"]))
|
||||||
|
|
||||||
scores_df = pd.DataFrame(lin_reg_scores)[test_metrics]
|
scores_df = pd.DataFrame(lin_reg_scores)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||||
scores_df["method"] = "linear_reg"
|
scores_df["method"] = "linear_reg"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del lin_reg
|
del lin_reg
|
||||||
|
@ -227,7 +238,7 @@ def run_all_regression_models(
|
||||||
print("Ridge regression")
|
print("Ridge regression")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(ridge_reg_scores)[test_metrics]
|
scores_df = pd.DataFrame(ridge_reg_scores)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||||
scores_df["method"] = "ridge_reg"
|
scores_df["method"] = "ridge_reg"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del ridge_reg
|
del ridge_reg
|
||||||
|
@ -246,7 +257,7 @@ def run_all_regression_models(
|
||||||
print("Lasso regression")
|
print("Lasso regression")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(lasso_reg_score)[test_metrics]
|
scores_df = pd.DataFrame(lasso_reg_score)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||||
scores_df["method"] = "lasso_reg"
|
scores_df["method"] = "lasso_reg"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del lasso_reg
|
del lasso_reg
|
||||||
|
@ -265,7 +276,7 @@ def run_all_regression_models(
|
||||||
print("Bayesian Ridge")
|
print("Bayesian Ridge")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(bayesian_ridge_reg_score)[test_metrics]
|
scores_df = pd.DataFrame(bayesian_ridge_reg_score)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||||
scores_df["method"] = "bayesian_ridge"
|
scores_df["method"] = "bayesian_ridge"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del bayesian_ridge_reg
|
del bayesian_ridge_reg
|
||||||
|
@ -284,7 +295,7 @@ def run_all_regression_models(
|
||||||
print("RANSAC (outlier robust regression)")
|
print("RANSAC (outlier robust regression)")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(ransac_reg_score)[test_metrics]
|
scores_df = pd.DataFrame(ransac_reg_score)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||||
scores_df["method"] = "RANSAC"
|
scores_df["method"] = "RANSAC"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del ransac_reg
|
del ransac_reg
|
||||||
|
@ -303,7 +314,7 @@ def run_all_regression_models(
|
||||||
print("Support vector regression")
|
print("Support vector regression")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(svr_score)[test_metrics]
|
scores_df = pd.DataFrame(svr_score)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||||
scores_df["method"] = "SVR"
|
scores_df["method"] = "SVR"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del svr
|
del svr
|
||||||
|
@ -322,7 +333,7 @@ def run_all_regression_models(
|
||||||
print("Kernel Ridge regression")
|
print("Kernel Ridge regression")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(kridge_score)[test_metrics]
|
scores_df = pd.DataFrame(kridge_score)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||||
scores_df["method"] = "kernel_ridge"
|
scores_df["method"] = "kernel_ridge"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del kridge
|
del kridge
|
||||||
|
@ -341,7 +352,7 @@ def run_all_regression_models(
|
||||||
print("Gaussian Process Regression")
|
print("Gaussian Process Regression")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(gpr_score)[test_metrics]
|
scores_df = pd.DataFrame(gpr_score)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||||
scores_df["method"] = "gaussian_proc"
|
scores_df["method"] = "gaussian_proc"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del gpr
|
del gpr
|
||||||
|
@ -360,7 +371,7 @@ def run_all_regression_models(
|
||||||
print("Random Forest Regression")
|
print("Random Forest Regression")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(rfr_score)[test_metrics]
|
scores_df = pd.DataFrame(rfr_score)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||||
scores_df["method"] = "random_forest"
|
scores_df["method"] = "random_forest"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del rfr
|
del rfr
|
||||||
|
@ -379,7 +390,7 @@ def run_all_regression_models(
|
||||||
print("XGBoost Regressor")
|
print("XGBoost Regressor")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(xgb_score)[test_metrics]
|
scores_df = pd.DataFrame(xgb_score)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||||
scores_df["method"] = "XGBoost"
|
scores_df["method"] = "XGBoost"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del xgb
|
del xgb
|
||||||
|
@ -398,7 +409,7 @@ def run_all_regression_models(
|
||||||
print("ADA Boost Regressor")
|
print("ADA Boost Regressor")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(ada_score)[test_metrics]
|
scores_df = pd.DataFrame(ada_score)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", np.nanmedian]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", np.nanmedian])
|
||||||
scores_df["method"] = "ADA_boost"
|
scores_df["method"] = "ADA_boost"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del ada
|
del ada
|
||||||
|
@ -416,7 +427,7 @@ def run_all_classification_models(
|
||||||
metrics = ["accuracy", "average_precision", "recall", "f1"]
|
metrics = ["accuracy", "average_precision", "recall", "f1"]
|
||||||
test_metrics = ["test_" + metric for metric in metrics]
|
test_metrics = ["test_" + metric for metric in metrics]
|
||||||
|
|
||||||
scores = pd.DataFrame(columns=["method", "max", "mean"])
|
scores = pd.DataFrame(columns=["method", "metric", "max", "mean"])
|
||||||
|
|
||||||
dummy_class = DummyClassifier(strategy="most_frequent")
|
dummy_class = DummyClassifier(strategy="most_frequent")
|
||||||
|
|
||||||
|
@ -433,7 +444,7 @@ def run_all_classification_models(
|
||||||
print("Dummy")
|
print("Dummy")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(dummy_score)[test_metrics]
|
scores_df = pd.DataFrame(dummy_score)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||||
scores_df["method"] = "Dummy"
|
scores_df["method"] = "Dummy"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del dummy_class
|
del dummy_class
|
||||||
|
@ -453,7 +464,7 @@ def run_all_classification_models(
|
||||||
print("Logistic regression")
|
print("Logistic regression")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(log_reg_scores)[test_metrics]
|
scores_df = pd.DataFrame(log_reg_scores)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||||
scores_df["method"] = "logistic_reg"
|
scores_df["method"] = "logistic_reg"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del logistic_regression
|
del logistic_regression
|
||||||
|
@ -473,7 +484,7 @@ def run_all_classification_models(
|
||||||
print("Support Vector Machine")
|
print("Support Vector Machine")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(svc_scores)[test_metrics]
|
scores_df = pd.DataFrame(svc_scores)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||||
scores_df["method"] = "svc"
|
scores_df["method"] = "svc"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del svc
|
del svc
|
||||||
|
@ -493,7 +504,7 @@ def run_all_classification_models(
|
||||||
print("Gaussian Naive Bayes")
|
print("Gaussian Naive Bayes")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(gaussian_nb_scores)[test_metrics]
|
scores_df = pd.DataFrame(gaussian_nb_scores)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||||
scores_df["method"] = "gaussian_naive_bayes"
|
scores_df["method"] = "gaussian_naive_bayes"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del gaussian_nb
|
del gaussian_nb
|
||||||
|
@ -513,7 +524,7 @@ def run_all_classification_models(
|
||||||
print("Stochastic Gradient Descent")
|
print("Stochastic Gradient Descent")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(sgdc_scores)[test_metrics]
|
scores_df = pd.DataFrame(sgdc_scores)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||||
scores_df["method"] = "stochastic_gradient_descent"
|
scores_df["method"] = "stochastic_gradient_descent"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del sgdc
|
del sgdc
|
||||||
|
@ -533,7 +544,7 @@ def run_all_classification_models(
|
||||||
print("Random Forest")
|
print("Random Forest")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(rfc_scores)[test_metrics]
|
scores_df = pd.DataFrame(rfc_scores)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||||
scores_df["method"] = "random_forest"
|
scores_df["method"] = "random_forest"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del rfc
|
del rfc
|
||||||
|
@ -553,7 +564,7 @@ def run_all_classification_models(
|
||||||
print("XGBoost")
|
print("XGBoost")
|
||||||
|
|
||||||
scores_df = pd.DataFrame(xgb_scores)[test_metrics]
|
scores_df = pd.DataFrame(xgb_scores)[test_metrics]
|
||||||
scores_df = scores_df.agg(["max", "mean"]).transpose()
|
scores_df = aggregate_and_transpose(scores_df, statistics=["max", "mean"])
|
||||||
scores_df["method"] = "xgboost"
|
scores_df["method"] = "xgboost"
|
||||||
scores = pd.concat([scores, scores_df])
|
scores = pd.concat([scores, scores_df])
|
||||||
del xgb_classifier
|
del xgb_classifier
|
||||||
|
|
Loading…
Reference in New Issue