Add additional categorical features (uncomment).

ml_pipeline
Primoz 2022-11-28 13:42:46 +01:00
parent 9a218c8e2a
commit 7504aa34cf
3 changed files with 4 additions and 4 deletions

View File

@ -80,7 +80,7 @@ else:
# %% jupyter={"source_hidden": true} # %% jupyter={"source_hidden": true}
categorical_feature_colnames = ["gender", "startlanguage"] categorical_feature_colnames = ["gender", "startlanguage"]
additional_categorical_features = [] #[col for col in data_x.columns if "mostcommonactivity" in col or "homelabel" in col] additional_categorical_features = [col for col in data_x.columns if "mostcommonactivity" in col or "homelabel" in col]
categorical_feature_colnames += additional_categorical_features categorical_feature_colnames += additional_categorical_features
categorical_features = data_x[categorical_feature_colnames].copy() categorical_features = data_x[categorical_feature_colnames].copy()

View File

@ -122,7 +122,7 @@ for k in range(n_clusters):
# Treat categorical features # Treat categorical features
categorical_feature_colnames = ["gender", "startlanguage"] categorical_feature_colnames = ["gender", "startlanguage"]
additional_categorical_features = [] #[col for col in data_x.columns if "mostcommonactivity" in col or "homelabel" in col] additional_categorical_features = [col for col in data_x.columns if "mostcommonactivity" in col or "homelabel" in col]
categorical_feature_colnames += additional_categorical_features categorical_feature_colnames += additional_categorical_features
categorical_features = data_x[categorical_feature_colnames].copy() categorical_features = data_x[categorical_feature_colnames].copy()

View File

@ -75,8 +75,8 @@ def treat_categorical_features(input_set):
# %% [markdown] # %% [markdown]
# ## Set script's parameters # ## Set script's parameters
n_clusters = 4 # Number of clusters (could be regarded as a hyperparameter) n_clusters = 3 # Number of clusters (could be regarded as a hyperparameter)
n_sl = 1 # Number of largest/smallest accuracies (of particular CV) outputs n_sl = 3 # Number of largest/smallest accuracies (of particular CV) outputs
# %% jupyter={"source_hidden": true} # %% jupyter={"source_hidden": true}
model_input = pd.read_csv("../data/intradaily_30_min_all_targets/input_JCQ_job_demand_mean.csv") model_input = pd.read_csv("../data/intradaily_30_min_all_targets/input_JCQ_job_demand_mean.csv")