Test nonstandardized data with regular classification pipeline.

ml_pipeline
Primoz 2022-11-29 14:06:06 +01:00
parent 7504aa34cf
commit cf0e4f89be
1 changed files with 6 additions and 4 deletions

View File

@ -51,17 +51,19 @@ cv_method_str = 'logo' # logo, halflogo, 5kfold # Cross-validation method (could
n_sl = 1 # Number of largest/smallest accuracies (of particular CV) outputs
# %% jupyter={"source_hidden": true}
model_input = pd.read_csv("../data/intradaily_30_min_all_targets/input_JCQ_job_demand_mean.csv")
model_input = pd.read_csv("../data/stressfulness_event_nonstandardized/input_appraisal_stressfulness_event_mean.csv")
# %% jupyter={"source_hidden": true}
index_columns = ["local_segment", "local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]
model_input.set_index(index_columns, inplace=True)
model_input['target'].value_counts()
# %% jupyter={"source_hidden": true}
bins = [-10, -1, 1, 10] # bins for z-scored targets
model_input['target'], edges = pd.cut(model_input.target, bins=bins, labels=['low', 'medium', 'high'], retbins=True, right=True) #['low', 'medium', 'high']
# bins = [-10, -1, 1, 10] # bins for z-scored targets
bins = [0, 1, 4] # bins for stressfulness (1-4) target
model_input['target'], edges = pd.cut(model_input.target, bins=bins, labels=['low', 'high'], retbins=True, right=True) #['low', 'medium', 'high']
model_input['target'].value_counts(), edges
model_input = model_input[model_input['target'] != "medium"]
# model_input = model_input[model_input['target'] != "medium"]
model_input['target'] = model_input['target'].astype(str).apply(lambda x: 0 if x == "low" else 1)
model_input['target'].value_counts()