# --- # jupyter: # jupytext: # formats: ipynb,py:percent # text_representation: # extension: .py # format_name: percent # format_version: '1.3' # jupytext_version: 1.14.5 # kernelspec: # display_name: straw2analysis # language: python # name: straw2analysis # --- # %% import os import sys import pandas as pd from machine_learning.helper import ( impute_encode_categorical_features, prepare_cross_validator, prepare_sklearn_data_format, run_all_regression_models, ) nb_dir = os.path.split(os.getcwd())[0] if nb_dir not in sys.path: sys.path.append(nb_dir) # %% model_input = pd.read_csv( "../data/intradaily_30_min_all_targets/input_JCQ_job_demand_mean.csv" ) # %% model_input = model_input[model_input["local_segment"].str.contains("daily")] # %% CV_METHOD = "logo" # logo, half_logo, 5kfold model_input_encoded = impute_encode_categorical_features(model_input) # %% data_x, data_y, data_groups = prepare_sklearn_data_format( model_input_encoded, CV_METHOD ) cross_validator = prepare_cross_validator(data_x, data_y, data_groups, CV_METHOD) # %% data_y.head() # %% data_y.tail() # %% data_y.shape # %% scores = run_all_regression_models(data_x, data_y, data_groups, cross_validator) # %% scores.to_csv( "../presentation/JCQ_supervisor_support_regression_" + CV_METHOD + ".csv", index=False, )