From e4985c91214ac6ecd43501d52b67816d26d4fdef Mon Sep 17 00:00:00 2001 From: Primoz Date: Thu, 10 Nov 2022 10:29:11 +0000 Subject: [PATCH] Override stressfulness event target with extracted values from csv. --- .../all_cleaning_overall/straw/main.py | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/features/all_cleaning_overall/straw/main.py b/src/features/all_cleaning_overall/straw/main.py index 5fba061e..d0b4639c 100644 --- a/src/features/all_cleaning_overall/straw/main.py +++ b/src/features/all_cleaning_overall/straw/main.py @@ -14,21 +14,26 @@ def straw_cleaning(sensor_data_files, provider, target): features = pd.read_csv(sensor_data_files["sensor_data"][0]) - # features = features[features['local_segment_label'] == 'working_day'] # Filtriranje ustreznih Ĩasovnih segmentov - - # print(features) - # sys.exit() - - esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns - with open('config.yaml', 'r') as stream: config = yaml.load(stream, Loader=yaml.FullLoader) + esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns + excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime'] graph_bf_af(features, "1target_rows_before") + # (1.0) OVERRIDE STRESSFULNESS EVENT TARGETS IF ERS TARGETS_METHOD IS "STRESS_EVENT" + if config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["TARGETS_METHOD"] == "stress_event" and \ + "appraisal_stressfulness_event_mean" in config['PARAMS_FOR_ANALYSIS']['TARGET']['ALL_LABELS']: - # (1) FILTER_OUT THE ROWS THAT DO NOT HAVE THE TARGET COLUMN AVAILABLE + stress_events_targets = pd.read_csv("data/external/stress_event_targets.csv") + features.drop(columns=['phone_esm_straw_appraisal_stressfulness_event_mean'], inplace=True) + features = features.merge(stress_events_targets.rename(columns={'label': 'local_segment_label'}), on=['local_segment_label'], how='inner') \ + .rename(columns={'intensity': 'phone_esm_straw_appraisal_stressfulness_event_mean'}) + + esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns + + # (1.1) FILTER_OUT THE ROWS THAT DO NOT HAVE THE TARGET COLUMN AVAILABLE if config['PARAMS_FOR_ANALYSIS']['TARGET']['COMPUTE']: features = features[features['phone_esm_straw_' + target].notna()].reset_index(drop=True) @@ -51,18 +56,12 @@ def straw_cleaning(sensor_data_files, provider, target): # Drop rows where phone data yield is less then given threshold if provider["PHONE_DATA_YIELD_RATIO_THRESHOLD"]: - # print("\nThreshold:", provider["PHONE_DATA_YIELD_RATIO_THRESHOLD"]) - # print("Phone features data yield stats:", features[phone_data_yield_column].describe(), "\n") - # print(features[phone_data_yield_column].sort_values()) hist = features[phone_data_yield_column].hist(bins=5) plt.close() features = features[features[phone_data_yield_column] >= provider["PHONE_DATA_YIELD_RATIO_THRESHOLD"]].reset_index(drop=True) # Drop rows where empatica data yield is less then given threshold if provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"]: - # print("\nThreshold:", provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"]) - # print("E4 features data yield stats:", features["empatica_data_yield"].describe(), "\n") - # print(features["empatica_data_yield"].sort_values()) features = features[features["empatica_data_yield"] >= provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"]].reset_index(drop=True) if features.empty: