diff --git a/exploration/ml_pipeline_classification.py b/exploration/ml_pipeline_classification.py index 709cdc2..eca7210 100644 --- a/exploration/ml_pipeline_classification.py +++ b/exploration/ml_pipeline_classification.py @@ -13,7 +13,7 @@ # name: straw2analysis # --- -# %% jupyter={"source_hidden": false, "outputs_hidden": false} +# %% jupyter={"outputs_hidden": false, "source_hidden": false} # %matplotlib inline import os import sys @@ -43,19 +43,20 @@ UNDERSAMPLING = False # (bool) If True this will train and test data on balanced dataset # (using undersampling method) -# %% jupyter={"source_hidden": false, "outputs_hidden": false} +# %% jupyter={"outputs_hidden": false, "source_hidden": false} model_input = pd.read_csv( - "E:/STRAWresults/20230415/daily/input_PANAS_negative_affect_mean.csv" + "E:/STRAWresults/20230415/stress_event/input_appraisal_stressfulness_event_mean.csv" ) # model_input = # model_input[model_input.columns.drop( # list(model_input.filter(regex='empatica_temperature')) # )] +# model_input = model_input[model_input['local_segment'].str.contains("daily")] -# %% jupyter={"source_hidden": false, "outputs_hidden": false} +# %% jupyter={"outputs_hidden": false, "source_hidden": false} model_input["target"].value_counts() -# %% jupyter={"source_hidden": false, "outputs_hidden": false} +# %% jupyter={"outputs_hidden": false, "source_hidden": false} # bins = [-10, 0, 10] # bins for z-scored targets bins = [-1, 0, 4] # bins for stressfulness (0-4) target model_input["target"], edges = pd.cut( @@ -69,7 +70,7 @@ model_input["target"] = ( model_input["target"].value_counts() -# %% jupyter={"source_hidden": false, "outputs_hidden": false} +# %% jupyter={"outputs_hidden": false, "source_hidden": false} # UnderSampling if UNDERSAMPLING: no_stress = model_input[model_input["target"] == 0] @@ -79,7 +80,7 @@ if UNDERSAMPLING: model_input = pd.concat([stress, no_stress], axis=0) -# %% jupyter={"source_hidden": false, "outputs_hidden": false} +# %% jupyter={"outputs_hidden": false, "source_hidden": false} model_input_encoded = impute_encode_categorical_features(model_input) # %% data_x, data_y, data_groups = prepare_sklearn_data_format( @@ -98,6 +99,8 @@ data_y.shape scores = run_all_classification_models(data_x, data_y, data_groups, cross_validator) # %% scores.to_csv( - "../presentation/JCQ_supervisor_support_regression_" + CV_METHOD + ".csv", + "../presentation/appraisal_stressfulness_event_classification_" + + CV_METHOD + + ".csv", index=False, )