Merge branch 'imputation_and_cleaning' of https://repo.ijs.si/junoslukan/rapids into imputation_and_cleaning

imputation_and_cleaning
Primoz 2022-10-26 14:18:20 +00:00
commit 5d17c92e54
1 changed files with 2 additions and 1 deletions

View File

@ -36,7 +36,6 @@ def straw_cleaning(sensor_data_files, provider, target):
return pd.DataFrame(columns=excluded_columns) return pd.DataFrame(columns=excluded_columns)
graph_bf_af(features, "2target_rows_after") graph_bf_af(features, "2target_rows_after")
print("HERE1", target, features["pid"])
# (2) QUALITY CHECK (DATA YIELD COLUMN) drops the rows where E4 or phone data is low quality # (2) QUALITY CHECK (DATA YIELD COLUMN) drops the rows where E4 or phone data is low quality
phone_data_yield_unit = provider["PHONE_DATA_YIELD_FEATURE"].split("_")[3].lower() phone_data_yield_unit = provider["PHONE_DATA_YIELD_FEATURE"].split("_")[3].lower()
@ -66,6 +65,8 @@ def straw_cleaning(sensor_data_files, provider, target):
# print(features["empatica_data_yield"].sort_values()) # print(features["empatica_data_yield"].sort_values())
features = features[features["empatica_data_yield"] >= provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"]].reset_index(drop=True) features = features[features["empatica_data_yield"] >= provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"]].reset_index(drop=True)
if features.empty:
return pd.DataFrame(columns=excluded_columns)
graph_bf_af(features, "3data_yield_drop_rows") graph_bf_af(features, "3data_yield_drop_rows")