From 112d96871590aa792168d0640fd1aff23cc9da84 Mon Sep 17 00:00:00 2001 From: junos Date: Wed, 31 May 2023 22:25:39 +0200 Subject: [PATCH] Add baseline features. --- .../ml_pipeline_classification_composite.py | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/exploration/ml_pipeline_classification_composite.py b/exploration/ml_pipeline_classification_composite.py index 9c67269..97ca903 100644 --- a/exploration/ml_pipeline_classification_composite.py +++ b/exploration/ml_pipeline_classification_composite.py @@ -46,12 +46,7 @@ print("SEGMENT_LENGTH: " + SEGMENT_LENGTH) PATH_FULL = PATH_BASE / SEGMENT_LENGTH / "features" / "all_sensor_features.csv" -model_input = pd.read_csv(PATH_FULL) - -if SEGMENT_LENGTH == "daily": - DAY_LENGTH = "daily" # or "working" - print(DAY_LENGTH) - model_input = model_input[model_input["local_segment"].str.contains(DAY_LENGTH)] +all_features_with_baseline = pd.read_csv(PATH_FULL) # %% TARGETS = [ @@ -129,8 +124,29 @@ if UNDERSAMPLING: model_input = pd.concat([stress, no_stress], axis=0) -# %% jupyter={"outputs_hidden": false, "source_hidden": false} +# %% +TARGET_VARIABLE = "PANAS_negative_affect" +print("TARGET_VARIABLE: " + TARGET_VARIABLE) + +PATH_FULL_HELP = PATH_BASE / SEGMENT_LENGTH / ("input_" + TARGET_VARIABLE + "_mean.csv") + +model_input_with_baseline = pd.read_csv(PATH_FULL_HELP, index_col="local_segment") + +# %% +baseline_col_names = [ + col for col in model_input_with_baseline.columns if col not in model_input.columns +] +print(baseline_col_names) + +# %% +model_input = model_input.join( + model_input_with_baseline[baseline_col_names], how="left" +) +model_input.reset_index(inplace=True) + +# %% model_input_encoded = impute_encode_categorical_features(model_input) + # %% data_x, data_y, data_groups = prepare_sklearn_data_format( model_input_encoded, CV_METHOD