Test nonstandardized data with regular classification pipeline.
parent
7504aa34cf
commit
cf0e4f89be
|
@ -51,17 +51,19 @@ cv_method_str = 'logo' # logo, halflogo, 5kfold # Cross-validation method (could
|
|||
n_sl = 1 # Number of largest/smallest accuracies (of particular CV) outputs
|
||||
|
||||
# %% jupyter={"source_hidden": true}
|
||||
model_input = pd.read_csv("../data/intradaily_30_min_all_targets/input_JCQ_job_demand_mean.csv")
|
||||
model_input = pd.read_csv("../data/stressfulness_event_nonstandardized/input_appraisal_stressfulness_event_mean.csv")
|
||||
|
||||
# %% jupyter={"source_hidden": true}
|
||||
index_columns = ["local_segment", "local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]
|
||||
model_input.set_index(index_columns, inplace=True)
|
||||
model_input['target'].value_counts()
|
||||
|
||||
# %% jupyter={"source_hidden": true}
|
||||
bins = [-10, -1, 1, 10] # bins for z-scored targets
|
||||
model_input['target'], edges = pd.cut(model_input.target, bins=bins, labels=['low', 'medium', 'high'], retbins=True, right=True) #['low', 'medium', 'high']
|
||||
# bins = [-10, -1, 1, 10] # bins for z-scored targets
|
||||
bins = [0, 1, 4] # bins for stressfulness (1-4) target
|
||||
model_input['target'], edges = pd.cut(model_input.target, bins=bins, labels=['low', 'high'], retbins=True, right=True) #['low', 'medium', 'high']
|
||||
model_input['target'].value_counts(), edges
|
||||
model_input = model_input[model_input['target'] != "medium"]
|
||||
# model_input = model_input[model_input['target'] != "medium"]
|
||||
model_input['target'] = model_input['target'].astype(str).apply(lambda x: 0 if x == "low" else 1)
|
||||
|
||||
model_input['target'].value_counts()
|
||||
|
|
Loading…
Reference in New Issue