Drop all window count related features in cleaning script.
parent
8ae5ad0e88
commit
36651a11c8
|
@ -87,6 +87,7 @@ def straw_cleaning(sensor_data_files, provider, target):
|
|||
if features.empty:
|
||||
return pd.DataFrame(columns=excluded_columns)
|
||||
|
||||
|
||||
# (3) CONTEXTUAL IMPUTATION
|
||||
|
||||
# Impute selected phone features with a high number
|
||||
|
@ -232,10 +233,16 @@ def straw_cleaning(sensor_data_files, provider, target):
|
|||
if cat2: # Transform columns to category dtype (homelabel)
|
||||
features[cat2] = features[cat2].astype(int).astype('category')
|
||||
|
||||
# (10) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
|
||||
# (10) DROP ALL WINDOW RELATED COLUMNS
|
||||
win_count_cols = [col for col in features if "SO_windowsCount" in col]
|
||||
if win_count_cols:
|
||||
features.drop(columns=win_count_cols, inplace=True)
|
||||
|
||||
# (11) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
|
||||
if features.isna().any().any():
|
||||
raise ValueError("There are still some NaNs present in the dataframe. Please check for implementation errors.")
|
||||
|
||||
|
||||
return features
|
||||
|
||||
def impute(df, method='zero'):
|
||||
|
|
Loading…
Reference in New Issue