Add PCA for composite target.
parent
78807b941c
commit
9cc6bf7c21
|
@ -17,6 +17,8 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import seaborn as sns
|
||||||
|
from sklearn.decomposition import PCA
|
||||||
|
|
||||||
from machine_learning.helper import (
|
from machine_learning.helper import (
|
||||||
impute_encode_categorical_features,
|
impute_encode_categorical_features,
|
||||||
|
@ -80,9 +82,25 @@ for target in TARGETS:
|
||||||
)
|
)
|
||||||
print(all_features_cleaned.shape)
|
print(all_features_cleaned.shape)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
pca = PCA(n_components=1)
|
||||||
|
TARGETS_PREFIXED = ["phone_esm_straw_" + target for target in TARGETS]
|
||||||
|
pca.fit(all_features_cleaned[TARGETS_PREFIXED])
|
||||||
|
print(pca.explained_variance_ratio_)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
model_input = all_features_cleaned.drop(columns=TARGETS_PREFIXED)
|
||||||
|
model_input["target"] = pca.fit_transform(all_features_cleaned[TARGETS_PREFIXED])
|
||||||
|
|
||||||
|
# %%
|
||||||
|
sns.histplot(data=model_input, x="target")
|
||||||
|
|
||||||
|
# %%
|
||||||
|
model_input.target.quantile(0.6)
|
||||||
|
|
||||||
# %% jupyter={"outputs_hidden": false, "source_hidden": false}
|
# %% jupyter={"outputs_hidden": false, "source_hidden": false}
|
||||||
# bins = [-10, 0, 10] # bins for z-scored targets
|
# bins = [-10, 0, 10] # bins for z-scored targets
|
||||||
BINS = [-1, 0, 4] # bins for stressfulness (0-4) target
|
BINS = [-10, 0, 10] # bins for stressfulness (0-4) target
|
||||||
print("BINS: ", BINS)
|
print("BINS: ", BINS)
|
||||||
model_input["target"], edges = pd.cut(
|
model_input["target"], edges = pd.cut(
|
||||||
model_input.target, bins=BINS, labels=["low", "high"], retbins=True, right=True
|
model_input.target, bins=BINS, labels=["low", "high"], retbins=True, right=True
|
||||||
|
|
Loading…
Reference in New Issue