From 4ec371ed96102d4b1f3b85557cb5fc3c388c1cef Mon Sep 17 00:00:00 2001 From: Primoz Date: Tue, 13 Sep 2022 09:51:03 +0000 Subject: [PATCH] Testing auto-sklearn --- automl_test.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 automl_test.py diff --git a/automl_test.py b/automl_test.py new file mode 100644 index 00000000..f3b574cd --- /dev/null +++ b/automl_test.py @@ -0,0 +1,56 @@ +from pprint import pprint +import sklearn.metrics +import autosklearn.regression + +import datetime +import importlib +import os +import sys + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +import yaml + +from sklearn import linear_model, svm, kernel_ridge, gaussian_process +from sklearn.model_selection import LeaveOneGroupOut, cross_val_score, train_test_split +from sklearn.metrics import mean_squared_error, r2_score +from sklearn.impute import SimpleImputer + +model_input = pd.read_csv("data/processed/models/population_model/z_input.csv") # Standardizirani podatki + +model_input.dropna(axis=1, how="all", inplace=True) +model_input.dropna(axis=0, how="any", subset=["target"], inplace=True) + +categorical_feature_colnames = ["gender", "startlanguage"] +categorical_features = model_input[categorical_feature_colnames].copy() +mode_categorical_features = categorical_features.mode().iloc[0] +categorical_features = categorical_features.fillna(mode_categorical_features) +categorical_features = categorical_features.apply(lambda col: col.astype("category")) +if not categorical_features.empty: + categorical_features = pd.get_dummies(categorical_features) +numerical_features = model_input.drop(categorical_feature_colnames, axis=1) +model_in = pd.concat([numerical_features, categorical_features], axis=1) + +index_columns = ["local_segment", "local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"] +model_in.set_index(index_columns, inplace=True) + +X_train, X_test, y_train, y_test = train_test_split(model_in.drop(["target", "pid"], axis=1), model_in["target"], test_size=0.20) + +automl = autosklearn.regression.AutoSklearnRegressor( + time_left_for_this_task=1200, + per_run_time_limit=60 +) +automl.fit(X_train, y_train, dataset_name='straw') + +print(automl.leaderboard()) +pprint(automl.show_models(), indent=4) + +train_predictions = automl.predict(X_train) +print("Train R2 score:", sklearn.metrics.r2_score(y_train, train_predictions)) +test_predictions = automl.predict(X_test) +print("Test R2 score:", sklearn.metrics.r2_score(y_test, test_predictions)) + +import sys +sys.exit()