Add safenet when features dataframe is empty.

imputation_and_cleaning
Primoz 2022-10-25 15:26:43 +00:00
parent bfd637eb9c
commit 8acac50125
1 changed files with 3 additions and 2 deletions

View File

@ -14,7 +14,7 @@ def straw_cleaning(sensor_data_files, provider, target):
features = pd.read_csv(sensor_data_files["sensor_data"][0]) features = pd.read_csv(sensor_data_files["sensor_data"][0])
features = features[features['local_segment_label'] == 'working_day'] # Filtriranje ustreznih časovnih segmentov # features = features[features['local_segment_label'] == 'working_day'] # Filtriranje ustreznih časovnih segmentov
# print(features) # print(features)
# sys.exit() # sys.exit()
@ -36,7 +36,6 @@ def straw_cleaning(sensor_data_files, provider, target):
return pd.DataFrame(columns=excluded_columns) return pd.DataFrame(columns=excluded_columns)
graph_bf_af(features, "2target_rows_after") graph_bf_af(features, "2target_rows_after")
print("HERE1", target, features["pid"])
# (2) QUALITY CHECK (DATA YIELD COLUMN) drops the rows where E4 or phone data is low quality # (2) QUALITY CHECK (DATA YIELD COLUMN) drops the rows where E4 or phone data is low quality
phone_data_yield_unit = provider["PHONE_DATA_YIELD_FEATURE"].split("_")[3].lower() phone_data_yield_unit = provider["PHONE_DATA_YIELD_FEATURE"].split("_")[3].lower()
@ -66,6 +65,8 @@ def straw_cleaning(sensor_data_files, provider, target):
# print(features["empatica_data_yield"].sort_values()) # print(features["empatica_data_yield"].sort_values())
features = features[features["empatica_data_yield"] >= provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"]].reset_index(drop=True) features = features[features["empatica_data_yield"] >= provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"]].reset_index(drop=True)
if features.empty:
return pd.DataFrame(columns=excluded_columns)
graph_bf_af(features, "3data_yield_drop_rows") graph_bf_af(features, "3data_yield_drop_rows")