Fix some bugs and extend ERS and cleaning scripts with multiple stress event targets logic.
parent
ab803ee49c
commit
286de93bfd
|
@ -22,24 +22,28 @@ def straw_cleaning(sensor_data_files, provider, target):
|
||||||
excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime']
|
excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime']
|
||||||
|
|
||||||
graph_bf_af(features, "1target_rows_before")
|
graph_bf_af(features, "1target_rows_before")
|
||||||
# (1.0) OVERRIDE STRESSFULNESS EVENT TARGETS IF ERS TARGETS_METHOD IS "STRESS_EVENT"
|
|
||||||
if config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["TARGETS_METHOD"] == "stress_event":
|
# (1.0) OVERRIDE STRESSFULNESS EVENT TARGETS IF ERS SEGMENTING_METHOD IS "STRESS_EVENT"
|
||||||
|
if config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["SEGMENTING_METHOD"] == "stress_event":
|
||||||
|
|
||||||
stress_events_targets = pd.read_csv("data/external/stress_event_targets.csv")
|
stress_events_targets = pd.read_csv("data/external/stress_event_targets.csv")
|
||||||
|
|
||||||
if "appraisal_stressfulness_event_mean" in config['PARAMS_FOR_ANALYSIS']['TARGET']['ALL_LABELS']:
|
if "appraisal_stressfulness_event_mean" in config['PARAMS_FOR_ANALYSIS']['TARGET']['ALL_LABELS']:
|
||||||
features.drop(columns=['phone_esm_straw_appraisal_stressfulness_event_mean'], inplace=True)
|
features.drop(columns=['phone_esm_straw_appraisal_stressfulness_event_mean'], inplace=True)
|
||||||
features = features.merge(stress_events_targets.rename(columns={'label': 'local_segment_label'}), on=['local_segment_label'], how='inner') \
|
features = features.merge(stress_events_targets[["label", "appraisal_stressfulness_event"]] \
|
||||||
|
.rename(columns={'label': 'local_segment_label'}), on=['local_segment_label'], how='inner') \
|
||||||
.rename(columns={'appraisal_stressfulness_event': 'phone_esm_straw_appraisal_stressfulness_event_mean'})
|
.rename(columns={'appraisal_stressfulness_event': 'phone_esm_straw_appraisal_stressfulness_event_mean'})
|
||||||
|
|
||||||
if "appraisal_threat_mean" in config['PARAMS_FOR_ANALYSIS']['TARGET']['ALL_LABELS']:
|
if "appraisal_threat_mean" in config['PARAMS_FOR_ANALYSIS']['TARGET']['ALL_LABELS']:
|
||||||
features.drop(columns=['phone_esm_straw_appraisal_threat_mean'], inplace=True)
|
features.drop(columns=['phone_esm_straw_appraisal_threat_mean'], inplace=True)
|
||||||
features = features.merge(stress_events_targets.rename(columns={'label': 'local_segment_label'}), on=['local_segment_label'], how='inner') \
|
features = features.merge(stress_events_targets[["label", "appraisal_threat"]] \
|
||||||
.rename(columns={'appraisal_threat_mean': 'phone_esm_straw_appraisal_threat_mean'})
|
.rename(columns={'label': 'local_segment_label'}), on=['local_segment_label'], how='inner') \
|
||||||
|
.rename(columns={'appraisal_threat': 'phone_esm_straw_appraisal_threat_mean'})
|
||||||
|
|
||||||
if "appraisal_challenge_mean" in config['PARAMS_FOR_ANALYSIS']['TARGET']['ALL_LABELS']:
|
if "appraisal_challenge_mean" in config['PARAMS_FOR_ANALYSIS']['TARGET']['ALL_LABELS']:
|
||||||
features.drop(columns=['phone_esm_straw_appraisal_challenge_mean'], inplace=True)
|
features.drop(columns=['phone_esm_straw_appraisal_challenge_mean'], inplace=True)
|
||||||
features = features.merge(stress_events_targets.rename(columns={'label': 'local_segment_label'}), on=['local_segment_label'], how='inner') \
|
features = features.merge(stress_events_targets[["label", "appraisal_challenge"]] \
|
||||||
|
.rename(columns={'label': 'local_segment_label'}), on=['local_segment_label'], how='inner') \
|
||||||
.rename(columns={'appraisal_challenge': 'phone_esm_straw_appraisal_challenge_mean'})
|
.rename(columns={'appraisal_challenge': 'phone_esm_straw_appraisal_challenge_mean'})
|
||||||
|
|
||||||
esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns
|
esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns
|
||||||
|
@ -234,7 +238,7 @@ def impute(df, method='zero'):
|
||||||
'knn': k_nearest(df)
|
'knn': k_nearest(df)
|
||||||
}[method]
|
}[method]
|
||||||
|
|
||||||
def graph_bf_af(features, phase_name, plt_flag=True):
|
def graph_bf_af(features, phase_name, plt_flag=False):
|
||||||
if plt_flag:
|
if plt_flag:
|
||||||
sns.set(rc={"figure.figsize":(16, 8)})
|
sns.set(rc={"figure.figsize":(16, 8)})
|
||||||
sns.heatmap(features.isna(), cbar=False) #features.select_dtypes(include=np.number)
|
sns.heatmap(features.isna(), cbar=False) #features.select_dtypes(include=np.number)
|
||||||
|
|
|
@ -42,7 +42,7 @@ def straw_features(sensor_data_files, time_segment, provider, filter_data_by_seg
|
||||||
requested_features = provider["FEATURES"]
|
requested_features = provider["FEATURES"]
|
||||||
# name of the features this function can compute
|
# name of the features this function can compute
|
||||||
requested_scales = provider["SCALES"]
|
requested_scales = provider["SCALES"]
|
||||||
base_features_names = ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support", \
|
base_features_names = ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support",
|
||||||
"appraisal_stressfulness_period", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge"]
|
"appraisal_stressfulness_period", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge"]
|
||||||
#TODO Check valid questionnaire and feature names.
|
#TODO Check valid questionnaire and feature names.
|
||||||
# the subset of requested features this function can compute
|
# the subset of requested features this function can compute
|
||||||
|
|
|
@ -122,12 +122,20 @@ def extract_ers(esm_df):
|
||||||
session_end_timestamp = esm_df.groupby(['device_id', 'esm_session'])['timestamp'].max().to_frame().rename(columns={'timestamp': 'session_end_timestamp'}) # questionnaire end timestamp
|
session_end_timestamp = esm_df.groupby(['device_id', 'esm_session'])['timestamp'].max().to_frame().rename(columns={'timestamp': 'session_end_timestamp'}) # questionnaire end timestamp
|
||||||
se_time = esm_df[esm_df.questionnaire_id == 90.].set_index(['device_id', 'esm_session'])['esm_user_answer'].to_frame().rename(columns={'esm_user_answer': 'se_time'})
|
se_time = esm_df[esm_df.questionnaire_id == 90.].set_index(['device_id', 'esm_session'])['esm_user_answer'].to_frame().rename(columns={'esm_user_answer': 'se_time'})
|
||||||
se_duration = esm_df[esm_df.questionnaire_id == 91.].set_index(['device_id', 'esm_session'])['esm_user_answer'].to_frame().rename(columns={'esm_user_answer': 'se_duration'})
|
se_duration = esm_df[esm_df.questionnaire_id == 91.].set_index(['device_id', 'esm_session'])['esm_user_answer'].to_frame().rename(columns={'esm_user_answer': 'se_duration'})
|
||||||
se_intensity = esm_df[esm_df.questionnaire_id == 87.].set_index(['device_id', 'esm_session'])['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'intensity'})
|
|
||||||
|
|
||||||
|
# Extracted 3 targets that will be transfered with the csv file to the cleaning script.
|
||||||
|
se_stressfulness_event_tg = esm_df[esm_df.questionnaire_id == 87.].set_index(['device_id', 'esm_session'])['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_stressfulness_event'})
|
||||||
|
se_threat_tg = esm_df[esm_df.questionnaire_id == 88.].groupby(["device_id", "esm_session"]).mean()['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_threat'})
|
||||||
|
se_challenge_tg = esm_df[esm_df.questionnaire_id == 89.].groupby(["device_id", "esm_session"]).mean()['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_challenge'})
|
||||||
|
|
||||||
|
# All relevant features are joined by inner join to remove standalone columns (e.g., stressfulness event target has larger count)
|
||||||
extracted_ers = extracted_ers.join(session_end_timestamp, on=['device_id', 'esm_session'], how='inner') \
|
extracted_ers = extracted_ers.join(session_end_timestamp, on=['device_id', 'esm_session'], how='inner') \
|
||||||
.join(se_time, on=['device_id', 'esm_session'], how='inner') \
|
.join(se_time, on=['device_id', 'esm_session'], how='inner') \
|
||||||
.join(se_duration, on=['device_id', 'esm_session'], how='inner') \
|
.join(se_duration, on=['device_id', 'esm_session'], how='inner') \
|
||||||
.join(se_intensity, on=['device_id', 'esm_session'], how='inner')
|
.join(se_stressfulness_event_tg, on=['device_id', 'esm_session'], how='inner') \
|
||||||
|
.join(se_threat_tg, on=['device_id', 'esm_session'], how='inner') \
|
||||||
|
.join(se_challenge_tg, on=['device_id', 'esm_session'], how='inner')
|
||||||
|
|
||||||
|
|
||||||
# Filter sessions that are not useful. Because of the ambiguity this excludes:
|
# Filter sessions that are not useful. Because of the ambiguity this excludes:
|
||||||
# (1) straw event times that are marked as "0 - I don't remember"
|
# (1) straw event times that are marked as "0 - I don't remember"
|
||||||
|
@ -163,8 +171,8 @@ def extract_ers(esm_df):
|
||||||
|
|
||||||
extracted_ers["label"] = f"straw_event_{segmenting_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3)
|
extracted_ers["label"] = f"straw_event_{segmenting_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3)
|
||||||
|
|
||||||
# Write the csv of extracted ERS labels with targets (stress event intensity)
|
# Write the csv of extracted ERS labels with targets related to stressfulness event
|
||||||
extracted_ers[["label", "intensity"]].to_csv(snakemake.output[1], index=False)
|
extracted_ers[["label", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge"]].to_csv(snakemake.output[1], index=False)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise Exception("Please select correct target method for the event-related segments.")
|
raise Exception("Please select correct target method for the event-related segments.")
|
||||||
|
@ -194,7 +202,7 @@ elif snakemake.params["stage"] == "merge":
|
||||||
|
|
||||||
input_data_files = dict(snakemake.input)
|
input_data_files = dict(snakemake.input)
|
||||||
straw_events = pd.DataFrame(columns=["label", "event_timestamp", "length", "shift", "shift_direction", "device_id"])
|
straw_events = pd.DataFrame(columns=["label", "event_timestamp", "length", "shift", "shift_direction", "device_id"])
|
||||||
stress_events_targets = pd.DataFrame(columns=["label", "intensity"])
|
stress_events_targets = pd.DataFrame(columns=["label", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge"])
|
||||||
|
|
||||||
for input_file in input_data_files["ers_files"]:
|
for input_file in input_data_files["ers_files"]:
|
||||||
ers_df = pd.read_csv(input_file)
|
ers_df = pd.read_csv(input_file)
|
||||||
|
|
Loading…
Reference in New Issue