diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml
new file mode 100644
index 0000000..2452f1c
--- /dev/null
+++ b/.idea/codeStyles/Project.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml
new file mode 100644
index 0000000..0f7bc51
--- /dev/null
+++ b/.idea/codeStyles/codeStyleConfig.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/machine_learning/helper.py b/machine_learning/helper.py
index aa4c870..4d4e48c 100644
--- a/machine_learning/helper.py
+++ b/machine_learning/helper.py
@@ -136,14 +136,11 @@ def prepare_regression_model_input(model_input, cv_method="logo"):
return train_x, data_y, data_groups
-def run_all_regression_models(input_csv):
- # Prepare data
- data_x, data_y, data_groups = prepare_regression_model_input(input_csv)
-
+def run_all_regression_models(train_x, data_y, data_groups):
# Prepare cross validation
logo = LeaveOneGroupOut()
logo.get_n_splits(
- data_x,
+ train_x,
data_y,
groups=data_groups,
)
@@ -155,7 +152,7 @@ def run_all_regression_models(input_csv):
dummy_regr = DummyRegressor(strategy="mean")
dummy_regr_scores = cross_validate(
dummy_regr,
- X=data_x,
+ X=train_x,
y=data_y,
groups=data_groups,
cv=logo,
@@ -173,7 +170,7 @@ def run_all_regression_models(input_csv):
lin_reg_rapids = linear_model.LinearRegression()
lin_reg_scores = cross_validate(
lin_reg_rapids,
- X=data_x,
+ X=train_x,
y=data_y,
groups=data_groups,
cv=logo,
@@ -191,7 +188,7 @@ def run_all_regression_models(input_csv):
ridge_reg = linear_model.Ridge(alpha=0.5)
ridge_reg_scores = cross_validate(
ridge_reg,
- X=data_x,
+ X=train_x,
y=data_y,
groups=data_groups,
cv=logo,
@@ -208,7 +205,7 @@ def run_all_regression_models(input_csv):
lasso_reg = linear_model.Lasso(alpha=0.1)
lasso_reg_score = cross_validate(
lasso_reg,
- X=data_x,
+ X=train_x,
y=data_y,
groups=data_groups,
cv=logo,
@@ -225,7 +222,7 @@ def run_all_regression_models(input_csv):
bayesian_ridge_reg = linear_model.BayesianRidge()
bayesian_ridge_reg_score = cross_validate(
bayesian_ridge_reg,
- X=data_x,
+ X=train_x,
y=data_y,
groups=data_groups,
cv=logo,
@@ -242,7 +239,7 @@ def run_all_regression_models(input_csv):
ransac_reg = linear_model.RANSACRegressor()
ransac_reg_score = cross_validate(
ransac_reg,
- X=data_x,
+ X=train_x,
y=data_y,
groups=data_groups,
cv=logo,
@@ -258,7 +255,13 @@ def run_all_regression_models(input_csv):
svr = svm.SVR()
svr_score = cross_validate(
- svr, X=data_x, y=data_y, groups=data_groups, cv=logo, n_jobs=-1, scoring=metrics
+ svr,
+ X=train_x,
+ y=data_y,
+ groups=data_groups,
+ cv=logo,
+ n_jobs=-1,
+ scoring=metrics,
)
print("Support vector regression")
@@ -270,7 +273,7 @@ def run_all_regression_models(input_csv):
kridge = kernel_ridge.KernelRidge()
kridge_score = cross_validate(
kridge,
- X=data_x,
+ X=train_x,
y=data_y,
groups=data_groups,
cv=logo,
@@ -286,7 +289,13 @@ def run_all_regression_models(input_csv):
gpr = gaussian_process.GaussianProcessRegressor()
gpr_score = cross_validate(
- gpr, X=data_x, y=data_y, groups=data_groups, cv=logo, n_jobs=-1, scoring=metrics
+ gpr,
+ X=train_x,
+ y=data_y,
+ groups=data_groups,
+ cv=logo,
+ n_jobs=-1,
+ scoring=metrics,
)
print("Gaussian Process Regression")
@@ -297,7 +306,13 @@ def run_all_regression_models(input_csv):
rfr = ensemble.RandomForestRegressor(max_features=0.3, n_jobs=-1)
rfr_score = cross_validate(
- rfr, X=data_x, y=data_y, groups=data_groups, cv=logo, n_jobs=-1, scoring=metrics
+ rfr,
+ X=train_x,
+ y=data_y,
+ groups=data_groups,
+ cv=logo,
+ n_jobs=-1,
+ scoring=metrics,
)
print("Random Forest Regression")
@@ -308,7 +323,13 @@ def run_all_regression_models(input_csv):
xgb = XGBRegressor()
xgb_score = cross_validate(
- xgb, X=data_x, y=data_y, groups=data_groups, cv=logo, n_jobs=-1, scoring=metrics
+ xgb,
+ X=train_x,
+ y=data_y,
+ groups=data_groups,
+ cv=logo,
+ n_jobs=-1,
+ scoring=metrics,
)
print("XGBoost Regressor")
@@ -319,7 +340,13 @@ def run_all_regression_models(input_csv):
ada = ensemble.AdaBoostRegressor()
ada_score = cross_validate(
- ada, X=data_x, y=data_y, groups=data_groups, cv=logo, n_jobs=-1, scoring=metrics
+ ada,
+ X=train_x,
+ y=data_y,
+ groups=data_groups,
+ cv=logo,
+ n_jobs=-1,
+ scoring=metrics,
)
print("ADA Boost Regressor")