Drop all window count related features in cleaning script.
parent
8ae5ad0e88
commit
36651a11c8
|
@ -87,6 +87,7 @@ def straw_cleaning(sensor_data_files, provider, target):
|
||||||
if features.empty:
|
if features.empty:
|
||||||
return pd.DataFrame(columns=excluded_columns)
|
return pd.DataFrame(columns=excluded_columns)
|
||||||
|
|
||||||
|
|
||||||
# (3) CONTEXTUAL IMPUTATION
|
# (3) CONTEXTUAL IMPUTATION
|
||||||
|
|
||||||
# Impute selected phone features with a high number
|
# Impute selected phone features with a high number
|
||||||
|
@ -232,10 +233,16 @@ def straw_cleaning(sensor_data_files, provider, target):
|
||||||
if cat2: # Transform columns to category dtype (homelabel)
|
if cat2: # Transform columns to category dtype (homelabel)
|
||||||
features[cat2] = features[cat2].astype(int).astype('category')
|
features[cat2] = features[cat2].astype(int).astype('category')
|
||||||
|
|
||||||
# (10) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
|
# (10) DROP ALL WINDOW RELATED COLUMNS
|
||||||
|
win_count_cols = [col for col in features if "SO_windowsCount" in col]
|
||||||
|
if win_count_cols:
|
||||||
|
features.drop(columns=win_count_cols, inplace=True)
|
||||||
|
|
||||||
|
# (11) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
|
||||||
if features.isna().any().any():
|
if features.isna().any().any():
|
||||||
raise ValueError("There are still some NaNs present in the dataframe. Please check for implementation errors.")
|
raise ValueError("There are still some NaNs present in the dataframe. Please check for implementation errors.")
|
||||||
|
|
||||||
|
|
||||||
return features
|
return features
|
||||||
|
|
||||||
def impute(df, method='zero'):
|
def impute(df, method='zero'):
|
||||||
|
|
Loading…
Reference in New Issue