diff --git a/exploration/ml_pipeline_classification.py b/exploration/ml_pipeline_classification.py index ad460d3..233dffc 100644 --- a/exploration/ml_pipeline_classification.py +++ b/exploration/ml_pipeline_classification.py @@ -80,7 +80,7 @@ else: # %% jupyter={"source_hidden": true} categorical_feature_colnames = ["gender", "startlanguage"] -additional_categorical_features = [] #[col for col in data_x.columns if "mostcommonactivity" in col or "homelabel" in col] +additional_categorical_features = [col for col in data_x.columns if "mostcommonactivity" in col or "homelabel" in col] categorical_feature_colnames += additional_categorical_features categorical_features = data_x[categorical_feature_colnames].copy() diff --git a/exploration/ml_pipeline_classification_with_clustering.py b/exploration/ml_pipeline_classification_with_clustering.py index 8887b50..0bf4417 100644 --- a/exploration/ml_pipeline_classification_with_clustering.py +++ b/exploration/ml_pipeline_classification_with_clustering.py @@ -122,7 +122,7 @@ for k in range(n_clusters): # Treat categorical features categorical_feature_colnames = ["gender", "startlanguage"] - additional_categorical_features = [] #[col for col in data_x.columns if "mostcommonactivity" in col or "homelabel" in col] + additional_categorical_features = [col for col in data_x.columns if "mostcommonactivity" in col or "homelabel" in col] categorical_feature_colnames += additional_categorical_features categorical_features = data_x[categorical_feature_colnames].copy() diff --git a/exploration/ml_pipeline_classification_with_clustering_2_class.py b/exploration/ml_pipeline_classification_with_clustering_2_class.py index 026362f..36468fa 100644 --- a/exploration/ml_pipeline_classification_with_clustering_2_class.py +++ b/exploration/ml_pipeline_classification_with_clustering_2_class.py @@ -75,8 +75,8 @@ def treat_categorical_features(input_set): # %% [markdown] # ## Set script's parameters -n_clusters = 4 # Number of clusters (could be regarded as a hyperparameter) -n_sl = 1 # Number of largest/smallest accuracies (of particular CV) outputs +n_clusters = 3 # Number of clusters (could be regarded as a hyperparameter) +n_sl = 3 # Number of largest/smallest accuracies (of particular CV) outputs # %% jupyter={"source_hidden": true} model_input = pd.read_csv("../data/intradaily_30_min_all_targets/input_JCQ_job_demand_mean.csv")