diff --git a/exploration/ml_pipeline_regression.py b/exploration/ml_pipeline_regression.py index ed27364..3e7177c 100644 --- a/exploration/ml_pipeline_regression.py +++ b/exploration/ml_pipeline_regression.py @@ -6,7 +6,7 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.13.0 +# jupytext_version: 1.14.5 # kernelspec: # display_name: straw2analysis # language: python @@ -36,7 +36,10 @@ model_input = pd.read_csv( ) # %% -CV_METHOD = "half_logo" # logo, half_logo, 5kfold +model_input = model_input[model_input["local_segment"].str.contains("daily")] + +# %% +CV_METHOD = "logo" # logo, half_logo, 5kfold model_input_encoded = impute_encode_categorical_features(model_input) # %% @@ -44,5 +47,20 @@ data_x, data_y, data_groups = prepare_sklearn_data_format( model_input_encoded, CV_METHOD ) cross_validator = prepare_cross_validator(data_x, data_y, data_groups, CV_METHOD) +# %% +data_y.head() + +# %% +data_y.tail() + +# %% +data_y.shape + # %% scores = run_all_regression_models(data_x, data_y, data_groups, cross_validator) + +# %% +scores.to_csv( + "../presentation/JCQ_supervisor_support_regression_" + CV_METHOD + ".csv", + index=False, +)