Testing new data with AutoML.

notes
Primoz 2022-10-10 16:45:38 +00:00
parent 2dc89c083c
commit 9884b383cf
1 changed files with 3 additions and 2 deletions

View File

@ -18,12 +18,13 @@ from sklearn.model_selection import LeaveOneGroupOut, cross_val_score, train_tes
from sklearn.metrics import mean_squared_error, r2_score from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer from sklearn.impute import SimpleImputer
model_input = pd.read_csv("data/processed/models/population_model/z_input.csv") # Standardizirani podatki model_input = pd.read_csv("data/processed/models/population_model/input_PANAS_negative_affect_mean.csv") # Standardizirani podatki
model_input.dropna(axis=1, how="all", inplace=True) model_input.dropna(axis=1, how="all", inplace=True)
model_input.dropna(axis=0, how="any", subset=["target"], inplace=True) model_input.dropna(axis=0, how="any", subset=["target"], inplace=True)
categorical_feature_colnames = ["gender", "startlanguage"] categorical_feature_colnames = ["gender", "startlanguage"]
categorical_feature_colnames += [col for col in model_input.columns if "mostcommonactivity" in col or "homelabel" in col]
categorical_features = model_input[categorical_feature_colnames].copy() categorical_features = model_input[categorical_feature_colnames].copy()
mode_categorical_features = categorical_features.mode().iloc[0] mode_categorical_features = categorical_features.mode().iloc[0]
categorical_features = categorical_features.fillna(mode_categorical_features) categorical_features = categorical_features.fillna(mode_categorical_features)
@ -39,7 +40,7 @@ model_in.set_index(index_columns, inplace=True)
X_train, X_test, y_train, y_test = train_test_split(model_in.drop(["target", "pid"], axis=1), model_in["target"], test_size=0.30) X_train, X_test, y_train, y_test = train_test_split(model_in.drop(["target", "pid"], axis=1), model_in["target"], test_size=0.30)
automl = autosklearn.regression.AutoSklearnRegressor( automl = autosklearn.regression.AutoSklearnRegressor(
time_left_for_this_task=14400, time_left_for_this_task=7200,
per_run_time_limit=120 per_run_time_limit=120
) )
automl.fit(X_train, y_train, dataset_name='straw') automl.fit(X_train, y_train, dataset_name='straw')