Set path programmatically.

2023-05-18 16:36:46 +02:00 · 2023-05-18 16:36:46 +02:00 · cad28c3fe8
parent 38a405d378
commit cad28c3fe8
1 changed files with 33 additions and 18 deletions
--- a/exploration/ml_pipeline_classification.py
+++ b/exploration/ml_pipeline_classification.py
@ -14,12 +14,13 @@
 # ---

 # %% jupyter={"outputs_hidden": false, "source_hidden": false}
-# %matplotlib inline
-import os
-import sys
+# from IPython.core.interactiveshell import InteractiveShell
+from pathlib import Path

+# matplotlib inline
+# import os
+# import sys
 import pandas as pd
-from IPython.core.interactiveshell import InteractiveShell

 from machine_learning.helper import (
    impute_encode_categorical_features,
@ -28,30 +29,44 @@ from machine_learning.helper import (
    run_all_classification_models,
 )

-InteractiveShell.ast_node_interactivity = "all"
-
-nb_dir = os.path.split(os.getcwd())[0]
-if nb_dir not in sys.path:
-    sys.path.append(nb_dir)
+# InteractiveShell.ast_node_interactivity = "all"
+#
+# nb_dir = os.path.split(os.getcwd())[0]
+# if nb_dir not in sys.path:
+#     sys.path.append(nb_dir)


 # %%
 CV_METHOD = "logo"  # logo, half_logo, 5kfold
 # Cross-validation method (could be regarded as a hyperparameter)
+print("CV_METHOD: " + CV_METHOD)
 N_SL = 3  # Number of largest/smallest accuracies (of particular CV) outputs
 UNDERSAMPLING = False
 # (bool) If True this will train and test data on balanced dataset
 # (using undersampling method)

 # %% jupyter={"outputs_hidden": false, "source_hidden": false}
-model_input = pd.read_csv(
-    "E:/STRAWresults/20230415/stress_event/input_appraisal_stressfulness_event_mean.csv"
+PATH_BASE = Path("E:/STRAWresults/20230415")
+
+SEGMENT_TYPE = "period"
+print("SEGMENT_TYPE: " + SEGMENT_TYPE)
+SEGMENT_LENGTH = "30_minutes_before"
+print("SEGMENT_LENGTH: " + SEGMENT_LENGTH)
+TARGET_VARIABLE = "appraisal_stressfulness"
+print("TARGET_VARIABLE: " + TARGET_VARIABLE)
+
+PATH_FULL = (
+    PATH_BASE
+    / SEGMENT_LENGTH
+    / ("input_" + TARGET_VARIABLE + "_" + SEGMENT_TYPE + "_mean.csv")
 )
-# model_input =
-# model_input[model_input.columns.drop(
-# list(model_input.filter(regex='empatica_temperature'))
-# )]
-# model_input = model_input[model_input['local_segment'].str.contains("daily")]
+
+model_input = pd.read_csv(PATH_FULL)
+
+if SEGMENT_LENGTH == "daily":
+    DAY_LENGTH = "daily"  # or "working"
+    print(DAY_LENGTH)
+    model_input = model_input[model_input["local_segment"].str.contains(DAY_LENGTH)]

 # %% jupyter={"outputs_hidden": false, "source_hidden": false}
 model_input["target"].value_counts()
@ -63,7 +78,7 @@ model_input["target"], edges = pd.cut(
    model_input.target, bins=bins, labels=["low", "high"], retbins=True, right=True
 )  # ['low', 'medium', 'high']
 model_input["target"].value_counts(), edges
-# model_input = model_input[model_input['target'] != "medium"]
+model_input = model_input[model_input["target"] != "medium"]
 model_input["target"] = (
    model_input["target"].astype(str).apply(lambda x: 0 if x == "low" else 1)
 )
@ -99,7 +114,7 @@ data_y.shape
 scores = run_all_classification_models(data_x, data_y, data_groups, cross_validator)
 # %%
 scores.to_csv(
-    "../presentation/appraisal_stressfulness_event_classification_"
+    "../presentation/results/appraisal_stressfulness_awake_classification_"
    + CV_METHOD
    + ".csv",
    index=False,