Testing new data with AutoML.
parent
2dc89c083c
commit
9884b383cf
|
@ -18,12 +18,13 @@ from sklearn.model_selection import LeaveOneGroupOut, cross_val_score, train_tes
|
||||||
from sklearn.metrics import mean_squared_error, r2_score
|
from sklearn.metrics import mean_squared_error, r2_score
|
||||||
from sklearn.impute import SimpleImputer
|
from sklearn.impute import SimpleImputer
|
||||||
|
|
||||||
model_input = pd.read_csv("data/processed/models/population_model/z_input.csv") # Standardizirani podatki
|
model_input = pd.read_csv("data/processed/models/population_model/input_PANAS_negative_affect_mean.csv") # Standardizirani podatki
|
||||||
|
|
||||||
model_input.dropna(axis=1, how="all", inplace=True)
|
model_input.dropna(axis=1, how="all", inplace=True)
|
||||||
model_input.dropna(axis=0, how="any", subset=["target"], inplace=True)
|
model_input.dropna(axis=0, how="any", subset=["target"], inplace=True)
|
||||||
|
|
||||||
categorical_feature_colnames = ["gender", "startlanguage"]
|
categorical_feature_colnames = ["gender", "startlanguage"]
|
||||||
|
categorical_feature_colnames += [col for col in model_input.columns if "mostcommonactivity" in col or "homelabel" in col]
|
||||||
categorical_features = model_input[categorical_feature_colnames].copy()
|
categorical_features = model_input[categorical_feature_colnames].copy()
|
||||||
mode_categorical_features = categorical_features.mode().iloc[0]
|
mode_categorical_features = categorical_features.mode().iloc[0]
|
||||||
categorical_features = categorical_features.fillna(mode_categorical_features)
|
categorical_features = categorical_features.fillna(mode_categorical_features)
|
||||||
|
@ -39,7 +40,7 @@ model_in.set_index(index_columns, inplace=True)
|
||||||
X_train, X_test, y_train, y_test = train_test_split(model_in.drop(["target", "pid"], axis=1), model_in["target"], test_size=0.30)
|
X_train, X_test, y_train, y_test = train_test_split(model_in.drop(["target", "pid"], axis=1), model_in["target"], test_size=0.30)
|
||||||
|
|
||||||
automl = autosklearn.regression.AutoSklearnRegressor(
|
automl = autosklearn.regression.AutoSklearnRegressor(
|
||||||
time_left_for_this_task=14400,
|
time_left_for_this_task=7200,
|
||||||
per_run_time_limit=120
|
per_run_time_limit=120
|
||||||
)
|
)
|
||||||
automl.fit(X_train, y_train, dataset_name='straw')
|
automl.fit(X_train, y_train, dataset_name='straw')
|
||||||
|
|
Loading…
Reference in New Issue