Drop all window count related features in cleaning script.

sociality-task
Primoz 2023-02-15 14:15:56 +00:00
parent 8ae5ad0e88
commit 36651a11c8
1 changed files with 8 additions and 1 deletions

View File

@ -87,6 +87,7 @@ def straw_cleaning(sensor_data_files, provider, target):
if features.empty: if features.empty:
return pd.DataFrame(columns=excluded_columns) return pd.DataFrame(columns=excluded_columns)
# (3) CONTEXTUAL IMPUTATION # (3) CONTEXTUAL IMPUTATION
# Impute selected phone features with a high number # Impute selected phone features with a high number
@ -232,10 +233,16 @@ def straw_cleaning(sensor_data_files, provider, target):
if cat2: # Transform columns to category dtype (homelabel) if cat2: # Transform columns to category dtype (homelabel)
features[cat2] = features[cat2].astype(int).astype('category') features[cat2] = features[cat2].astype(int).astype('category')
# (10) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME # (10) DROP ALL WINDOW RELATED COLUMNS
win_count_cols = [col for col in features if "SO_windowsCount" in col]
if win_count_cols:
features.drop(columns=win_count_cols, inplace=True)
# (11) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
if features.isna().any().any(): if features.isna().any().any():
raise ValueError("There are still some NaNs present in the dataframe. Please check for implementation errors.") raise ValueError("There are still some NaNs present in the dataframe. Please check for implementation errors.")
return features return features
def impute(df, method='zero'): def impute(df, method='zero'):