From 36651a11c8e88f8ccbaafe49c5130add661e9e33 Mon Sep 17 00:00:00 2001 From: Primoz Date: Wed, 15 Feb 2023 14:15:56 +0000 Subject: [PATCH] Drop all window count related features in cleaning script. --- src/features/all_cleaning_overall/straw/main.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/features/all_cleaning_overall/straw/main.py b/src/features/all_cleaning_overall/straw/main.py index 197c285d..683cfa01 100644 --- a/src/features/all_cleaning_overall/straw/main.py +++ b/src/features/all_cleaning_overall/straw/main.py @@ -87,6 +87,7 @@ def straw_cleaning(sensor_data_files, provider, target): if features.empty: return pd.DataFrame(columns=excluded_columns) + # (3) CONTEXTUAL IMPUTATION # Impute selected phone features with a high number @@ -232,10 +233,16 @@ def straw_cleaning(sensor_data_files, provider, target): if cat2: # Transform columns to category dtype (homelabel) features[cat2] = features[cat2].astype(int).astype('category') - # (10) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME + # (10) DROP ALL WINDOW RELATED COLUMNS + win_count_cols = [col for col in features if "SO_windowsCount" in col] + if win_count_cols: + features.drop(columns=win_count_cols, inplace=True) + + # (11) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME if features.isna().any().any(): raise ValueError("There are still some NaNs present in the dataframe. Please check for implementation errors.") + return features def impute(df, method='zero'):