Add PCA for composite target.
parent
78807b941c
commit
9cc6bf7c21
|
@ -17,6 +17,8 @@
|
|||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
from sklearn.decomposition import PCA
|
||||
|
||||
from machine_learning.helper import (
|
||||
impute_encode_categorical_features,
|
||||
|
@ -80,9 +82,25 @@ for target in TARGETS:
|
|||
)
|
||||
print(all_features_cleaned.shape)
|
||||
|
||||
# %%
|
||||
pca = PCA(n_components=1)
|
||||
TARGETS_PREFIXED = ["phone_esm_straw_" + target for target in TARGETS]
|
||||
pca.fit(all_features_cleaned[TARGETS_PREFIXED])
|
||||
print(pca.explained_variance_ratio_)
|
||||
|
||||
# %%
|
||||
model_input = all_features_cleaned.drop(columns=TARGETS_PREFIXED)
|
||||
model_input["target"] = pca.fit_transform(all_features_cleaned[TARGETS_PREFIXED])
|
||||
|
||||
# %%
|
||||
sns.histplot(data=model_input, x="target")
|
||||
|
||||
# %%
|
||||
model_input.target.quantile(0.6)
|
||||
|
||||
# %% jupyter={"outputs_hidden": false, "source_hidden": false}
|
||||
# bins = [-10, 0, 10] # bins for z-scored targets
|
||||
BINS = [-1, 0, 4] # bins for stressfulness (0-4) target
|
||||
BINS = [-10, 0, 10] # bins for stressfulness (0-4) target
|
||||
print("BINS: ", BINS)
|
||||
model_input["target"], edges = pd.cut(
|
||||
model_input.target, bins=BINS, labels=["low", "high"], retbins=True, right=True
|
||||
|
|
Loading…
Reference in New Issue