diff --git a/config/environment.yml b/config/environment.yml
index 62cb210..42d947b 100644
--- a/config/environment.yml
+++ b/config/environment.yml
@@ -7,6 +7,7 @@ dependencies:
   - black
   - isort
   - flake8
+  - imbalanced-learn=0.10.0
   - jupyterlab
   - jupytext
   - mypy
diff --git a/exploration/ml_pipeline_classification.py b/exploration/ml_pipeline_classification.py
index 33d1125..f539025 100644
--- a/exploration/ml_pipeline_classification.py
+++ b/exploration/ml_pipeline_classification.py
@@ -43,17 +43,19 @@ if nb_dir not in sys.path:
 # ## Set script's parameters
 cv_method_str = 'logo' # logo, half_logo, 5kfold # Cross-validation method (could be regarded as a hyperparameter)
 n_sl = 1 # Number of largest/smallest accuracies (of particular CV) outputs
+under_sampling = True # (bool) Will train and test data on balanced dataset (using undersampling method)
 
 # %% jupyter={"source_hidden": true}
-model_input = pd.read_csv("../data/intradaily_30_min_all_targets/input_JCQ_job_demand_mean.csv")
+model_input = pd.read_csv("../data/stressfulness_event_with_target_0/input_appraisal_stressfulness_event_mean.csv")
 
 # %% jupyter={"source_hidden": true}
 index_columns = ["local_segment", "local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]
 model_input.set_index(index_columns, inplace=True)
+model_input['target'].value_counts()
 
 # %% jupyter={"source_hidden": true}
-bins = [-10, 0, 10] # bins for z-scored targets
-# bins = [1, 2.5, 4] # bins for stressfulness (1-4) target
+# bins = [-10, 0, 10] # bins for z-scored targets
+bins = [-1, 0, 4] # bins for stressfulness (0-4) target
 model_input['target'], edges = pd.cut(model_input.target, bins=bins, labels=['low', 'high'], retbins=True, right=True) #['low', 'medium', 'high']
 model_input['target'].value_counts(), edges
 # model_input = model_input[model_input['target'] != "medium"]
@@ -61,6 +63,20 @@ model_input['target'] = model_input['target'].astype(str).apply(lambda x: 0 if x
 
 model_input['target'].value_counts()
 
+# %% jupyter={"source_hidden": true}
+# UnderSampling
+if under_sampling:
+    model_input.groupby("pid").count()
+    no_stress = model_input[model_input['target'] == 0]
+    stress = model_input[model_input['target'] == 1]
+
+    no_stress = no_stress.sample(n=len(stress))
+    model_input = pd.concat([stress,no_stress], axis=0)
+
+    model_input["target"].value_counts()
+
+
+# %% jupyter={"source_hidden": true}
 if cv_method_str == 'half_logo':
     model_input['pid_index'] = model_input.groupby('pid').cumcount()
     model_input['pid_count'] = model_input.groupby('pid')['pid'].transform('count')
@@ -119,11 +135,12 @@ dummy_classifier = cross_validate(
     cv=cv_method,
     n_jobs=-1,
     error_score='raise',
-    scoring=('accuracy', 'average_precision', 'recall', 'f1')
+    scoring=('accuracy', 'precision', 'recall', 'f1')
 )
 # %% jupyter={"source_hidden": true}
-print("Acc", np.mean(dummy_classifier['test_accuracy']))
-print("Precision", np.mean(dummy_classifier['test_average_precision']))
+print("Acc (median)", np.nanmedian(dummy_classifier['test_accuracy']))
+print("Acc (mean)", np.mean(dummy_classifier['test_accuracy']))
+print("Precision", np.mean(dummy_classifier['test_precision']))
 print("Recall", np.mean(dummy_classifier['test_recall']))
 print("F1", np.mean(dummy_classifier['test_f1']))
 print(f"Largest {n_sl} ACC:", np.sort(-np.partition(-dummy_classifier['test_accuracy'], n_sl)[:n_sl])[::-1])
@@ -146,7 +163,8 @@ log_reg_scores = cross_validate(
     scoring=('accuracy', 'precision', 'recall', 'f1')
 )
 # %% jupyter={"source_hidden": true}
-print("Acc", np.mean(log_reg_scores['test_accuracy']))
+print("Acc (median)", np.nanmedian(log_reg_scores['test_accuracy']))
+print("Acc (mean)", np.mean(log_reg_scores['test_accuracy']))
 print("Precision", np.mean(log_reg_scores['test_precision']))
 print("Recall", np.mean(log_reg_scores['test_recall']))
 print("F1", np.mean(log_reg_scores['test_f1']))
@@ -170,7 +188,8 @@ svc_scores = cross_validate(
     scoring=('accuracy', 'precision', 'recall', 'f1')
 )
 # %% jupyter={"source_hidden": true}
-print("Acc", np.mean(svc_scores['test_accuracy']))
+print("Acc (median)", np.nanmedian(svc_scores['test_accuracy']))
+print("Acc (mean)", np.mean(svc_scores['test_accuracy']))
 print("Precision", np.mean(svc_scores['test_precision']))
 print("Recall", np.mean(svc_scores['test_recall']))
 print("F1", np.mean(svc_scores['test_f1']))
@@ -195,7 +214,8 @@ gaussian_nb_scores = cross_validate(
     scoring=('accuracy', 'precision', 'recall', 'f1')
 )
 # %% jupyter={"source_hidden": true}
-print("Acc", np.mean(gaussian_nb_scores['test_accuracy']))
+print("Acc (median)", np.nanmedian(gaussian_nb_scores['test_accuracy']))
+print("Acc (mean)", np.mean(gaussian_nb_scores['test_accuracy']))
 print("Precision", np.mean(gaussian_nb_scores['test_precision']))
 print("Recall", np.mean(gaussian_nb_scores['test_recall']))
 print("F1", np.mean(gaussian_nb_scores['test_f1']))
@@ -220,7 +240,8 @@ sgdc_scores = cross_validate(
     scoring=('accuracy', 'precision', 'recall', 'f1')
 )
 # %% jupyter={"source_hidden": true}
-print("Acc", np.mean(sgdc_scores['test_accuracy']))
+print("Acc (median)", np.nanmedian(sgdc_scores['test_accuracy']))
+print("Acc (mean)", np.mean(sgdc_scores['test_accuracy']))
 print("Precision", np.mean(sgdc_scores['test_precision']))
 print("Recall", np.mean(sgdc_scores['test_recall']))
 print("F1", np.mean(sgdc_scores['test_f1']))
@@ -245,7 +266,8 @@ knn_scores = cross_validate(
     scoring=('accuracy', 'precision', 'recall', 'f1')
 )
 # %% jupyter={"source_hidden": true}
-print("Acc", np.mean(knn_scores['test_accuracy']))
+print("Acc (median)", np.nanmedian(knn_scores['test_accuracy']))
+print("Acc (mean)", np.mean(knn_scores['test_accuracy']))
 print("Precision", np.mean(knn_scores['test_precision']))
 print("Recall", np.mean(knn_scores['test_recall']))
 print("F1", np.mean(knn_scores['test_f1']))
@@ -270,7 +292,8 @@ dtree_scores = cross_validate(
     scoring=('accuracy', 'precision', 'recall', 'f1')
 )
 # %% jupyter={"source_hidden": true}
-print("Acc", np.mean(dtree_scores['test_accuracy']))
+print("Acc (median)", np.nanmedian(dtree_scores['test_accuracy']))
+print("Acc (mean)", np.mean(dtree_scores['test_accuracy']))
 print("Precision", np.mean(dtree_scores['test_precision']))
 print("Recall", np.mean(dtree_scores['test_recall']))
 print("F1", np.mean(dtree_scores['test_f1']))
@@ -295,7 +318,8 @@ rfc_scores = cross_validate(
     scoring=('accuracy', 'precision', 'recall', 'f1')
 )
 # %% jupyter={"source_hidden": true}
-print("Acc", np.mean(rfc_scores['test_accuracy']))
+print("Acc (median)", np.nanmedian(rfc_scores['test_accuracy']))
+print("Acc (mean)", np.mean(rfc_scores['test_accuracy']))
 print("Precision", np.mean(rfc_scores['test_precision']))
 print("Recall", np.mean(rfc_scores['test_recall']))
 print("F1", np.mean(rfc_scores['test_f1']))
@@ -320,7 +344,8 @@ gbc_scores = cross_validate(
     scoring=('accuracy', 'precision', 'recall', 'f1')
 )
 # %% jupyter={"source_hidden": true}
-print("Acc", np.mean(gbc_scores['test_accuracy']))
+print("Acc (median)", np.nanmedian(gbc_scores['test_accuracy']))
+print("Acc (mean)", np.mean(gbc_scores['test_accuracy']))
 print("Precision", np.mean(gbc_scores['test_precision']))
 print("Recall", np.mean(gbc_scores['test_recall']))
 print("F1", np.mean(gbc_scores['test_f1']))
@@ -345,7 +370,8 @@ lgbm_scores = cross_validate(
     scoring=('accuracy', 'precision', 'recall', 'f1')
 )
 # %% jupyter={"source_hidden": true}
-print("Acc", np.mean(lgbm_scores['test_accuracy']))
+print("Acc (median)", np.nanmedian(lgbm_scores['test_accuracy']))
+print("Acc (mean)", np.mean(lgbm_scores['test_accuracy']))
 print("Precision", np.mean(lgbm_scores['test_precision']))
 print("Recall", np.mean(lgbm_scores['test_recall']))
 print("F1", np.mean(lgbm_scores['test_f1']))
@@ -370,7 +396,8 @@ xgb_classifier_scores = cross_validate(
     scoring=('accuracy', 'precision', 'recall', 'f1')
 )
 # %% jupyter={"source_hidden": true}
-print("Acc", np.mean(xgb_classifier_scores['test_accuracy']))
+print("Acc (median)", np.nanmedian(xgb_classifier_scores['test_accuracy']))
+print("Acc (mean)", np.mean(xgb_classifier_scores['test_accuracy']))
 print("Precision", np.mean(xgb_classifier_scores['test_precision']))
 print("Recall", np.mean(xgb_classifier_scores['test_recall']))
 print("F1", np.mean(xgb_classifier_scores['test_f1']))