Override stressfulness event target with extracted values from csv.

imputation_and_cleaning
Primoz 2022-11-10 10:29:11 +00:00
parent a668b6e8da
commit e4985c9121
1 changed files with 13 additions and 14 deletions

View File

@ -14,21 +14,26 @@ def straw_cleaning(sensor_data_files, provider, target):
features = pd.read_csv(sensor_data_files["sensor_data"][0])
# features = features[features['local_segment_label'] == 'working_day'] # Filtriranje ustreznih časovnih segmentov
# print(features)
# sys.exit()
esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns
with open('config.yaml', 'r') as stream:
config = yaml.load(stream, Loader=yaml.FullLoader)
esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns
excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime']
graph_bf_af(features, "1target_rows_before")
# (1.0) OVERRIDE STRESSFULNESS EVENT TARGETS IF ERS TARGETS_METHOD IS "STRESS_EVENT"
if config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["TARGETS_METHOD"] == "stress_event" and \
"appraisal_stressfulness_event_mean" in config['PARAMS_FOR_ANALYSIS']['TARGET']['ALL_LABELS']:
# (1) FILTER_OUT THE ROWS THAT DO NOT HAVE THE TARGET COLUMN AVAILABLE
stress_events_targets = pd.read_csv("data/external/stress_event_targets.csv")
features.drop(columns=['phone_esm_straw_appraisal_stressfulness_event_mean'], inplace=True)
features = features.merge(stress_events_targets.rename(columns={'label': 'local_segment_label'}), on=['local_segment_label'], how='inner') \
.rename(columns={'intensity': 'phone_esm_straw_appraisal_stressfulness_event_mean'})
esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns
# (1.1) FILTER_OUT THE ROWS THAT DO NOT HAVE THE TARGET COLUMN AVAILABLE
if config['PARAMS_FOR_ANALYSIS']['TARGET']['COMPUTE']:
features = features[features['phone_esm_straw_' + target].notna()].reset_index(drop=True)
@ -51,18 +56,12 @@ def straw_cleaning(sensor_data_files, provider, target):
# Drop rows where phone data yield is less then given threshold
if provider["PHONE_DATA_YIELD_RATIO_THRESHOLD"]:
# print("\nThreshold:", provider["PHONE_DATA_YIELD_RATIO_THRESHOLD"])
# print("Phone features data yield stats:", features[phone_data_yield_column].describe(), "\n")
# print(features[phone_data_yield_column].sort_values())
hist = features[phone_data_yield_column].hist(bins=5)
plt.close()
features = features[features[phone_data_yield_column] >= provider["PHONE_DATA_YIELD_RATIO_THRESHOLD"]].reset_index(drop=True)
# Drop rows where empatica data yield is less then given threshold
if provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"]:
# print("\nThreshold:", provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"])
# print("E4 features data yield stats:", features["empatica_data_yield"].describe(), "\n")
# print(features["empatica_data_yield"].sort_values())
features = features[features["empatica_data_yield"] >= provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"]].reset_index(drop=True)
if features.empty: