Drop all window count related features in cleaning script.

sociality-task
Primoz 2023-02-15 14:15:56 +00:00
parent 8ae5ad0e88
commit 36651a11c8
1 changed files with 8 additions and 1 deletions

View File

@ -87,6 +87,7 @@ def straw_cleaning(sensor_data_files, provider, target):
if features.empty:
return pd.DataFrame(columns=excluded_columns)
# (3) CONTEXTUAL IMPUTATION
# Impute selected phone features with a high number
@ -232,10 +233,16 @@ def straw_cleaning(sensor_data_files, provider, target):
if cat2: # Transform columns to category dtype (homelabel)
features[cat2] = features[cat2].astype(int).astype('category')
# (10) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
# (10) DROP ALL WINDOW RELATED COLUMNS
win_count_cols = [col for col in features if "SO_windowsCount" in col]
if win_count_cols:
features.drop(columns=win_count_cols, inplace=True)
# (11) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
if features.isna().any().any():
raise ValueError("There are still some NaNs present in the dataframe. Please check for implementation errors.")
return features
def impute(df, method='zero'):