Seperate target standardization from rest of the features.

sociality-task
Primoz 2022-12-13 15:31:39 +00:00
parent e40f0fd8dc
commit 3ce7f2c2a5
2 changed files with 8 additions and 3 deletions

View File

@ -710,7 +710,8 @@ ALL_CLEANING_OVERALL:
COMPUTE: True
MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5
CORR_THRESHOLD: 0.95
STANDARDIZATION: False
STANDARDIZATION: True
TARGET_STANDARDIZATION: False
SRC_SCRIPT: src/features/all_cleaning_overall/straw/main.py

View File

@ -169,8 +169,12 @@ def straw_cleaning(sensor_data_files, provider, target):
# Expected warning within this code block
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=RuntimeWarning)
features.loc[:, ~features.columns.isin(excluded_columns + ["pid"] + nominal_cols)] = \
features.loc[:, ~features.columns.isin(excluded_columns + nominal_cols)].groupby('pid').transform(lambda x: StandardScaler().fit_transform(x.values[:,np.newaxis]).ravel())
if provider["TARGET_STANDARDIZATION"]:
features.loc[:, ~features.columns.isin(excluded_columns + ["pid"] + nominal_cols)] = \
features.loc[:, ~features.columns.isin(excluded_columns + nominal_cols)].groupby('pid').transform(lambda x: StandardScaler().fit_transform(x.values[:,np.newaxis]).ravel())
else:
features.loc[:, ~features.columns.isin(excluded_columns + ["pid"] + nominal_cols + ['phone_esm_straw_' + target])] = \
features.loc[:, ~features.columns.isin(excluded_columns + nominal_cols + ['phone_esm_straw_' + target])].groupby('pid').transform(lambda x: StandardScaler().fit_transform(x.values[:,np.newaxis]).ravel())
graph_bf_af(features, "8standardization")