From 9884b383cf6e1b339738deea8cc54a6d264e5c87 Mon Sep 17 00:00:00 2001 From: Primoz Date: Mon, 10 Oct 2022 16:45:38 +0000 Subject: [PATCH] Testing new data with AutoML. --- automl_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/automl_test.py b/automl_test.py index 405da670..3e2e3b84 100644 --- a/automl_test.py +++ b/automl_test.py @@ -18,12 +18,13 @@ from sklearn.model_selection import LeaveOneGroupOut, cross_val_score, train_tes from sklearn.metrics import mean_squared_error, r2_score from sklearn.impute import SimpleImputer -model_input = pd.read_csv("data/processed/models/population_model/z_input.csv") # Standardizirani podatki +model_input = pd.read_csv("data/processed/models/population_model/input_PANAS_negative_affect_mean.csv") # Standardizirani podatki model_input.dropna(axis=1, how="all", inplace=True) model_input.dropna(axis=0, how="any", subset=["target"], inplace=True) categorical_feature_colnames = ["gender", "startlanguage"] +categorical_feature_colnames += [col for col in model_input.columns if "mostcommonactivity" in col or "homelabel" in col] categorical_features = model_input[categorical_feature_colnames].copy() mode_categorical_features = categorical_features.mode().iloc[0] categorical_features = categorical_features.fillna(mode_categorical_features) @@ -39,7 +40,7 @@ model_in.set_index(index_columns, inplace=True) X_train, X_test, y_train, y_test = train_test_split(model_in.drop(["target", "pid"], axis=1), model_in["target"], test_size=0.30) automl = autosklearn.regression.AutoSklearnRegressor( - time_left_for_this_task=14400, + time_left_for_this_task=7200, per_run_time_limit=120 ) automl.fit(X_train, y_train, dataset_name='straw')