Modify the stress_event logic so that it includes where stressfulness is 0.
parent
87e5209a9f
commit
3b2001f570
|
@ -3,7 +3,7 @@
|
||||||
########################################################################################################################
|
########################################################################################################################
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/setup/configuration/#participant-files
|
# See https://www.rapids.science/latest/setup/configuration/#participant-files
|
||||||
PIDS: ['p031', 'p032', 'p033', 'p034', 'p035', 'p036', 'p037', 'p038', 'p039', 'p040', 'p042', 'p043', 'p044', 'p045', 'p046', 'p049', 'p050', 'p052', 'p053', 'p054', 'p055', 'p057', 'p058', 'p059', 'p060', 'p061', 'p062', 'p064', 'p067', 'p068', 'p069', 'p070', 'p071', 'p072', 'p073', 'p074', 'p075', 'p076', 'p077', 'p078', 'p079', 'p080', 'p081', 'p082', 'p083', 'p084', 'p085', 'p086', 'p088', 'p089', 'p090', 'p091', 'p092', 'p093', 'p106', 'p107']
|
PIDS: ['p03'] #['p031', 'p032', 'p033', 'p034', 'p035', 'p036', 'p037', 'p038', 'p039', 'p040', 'p042', 'p043', 'p044', 'p045', 'p046', 'p049', 'p050', 'p052', 'p053', 'p054', 'p055', 'p057', 'p058', 'p059', 'p060', 'p061', 'p062', 'p064', 'p067', 'p068', 'p069', 'p070', 'p071', 'p072', 'p073', 'p074', 'p075', 'p076', 'p077', 'p078', 'p079', 'p080', 'p081', 'p082', 'p083', 'p084', 'p085', 'p086', 'p088', 'p089', 'p090', 'p091', 'p092', 'p093', 'p106', 'p107']
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files
|
# See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files
|
||||||
CREATE_PARTICIPANT_FILES:
|
CREATE_PARTICIPANT_FILES:
|
||||||
|
@ -26,7 +26,7 @@ TIME_SEGMENTS: &time_segments
|
||||||
INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
|
INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
|
||||||
TAILORED_EVENTS: # Only relevant if TYPE=EVENT
|
TAILORED_EVENTS: # Only relevant if TYPE=EVENT
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
SEGMENTING_METHOD: "30_before" # 30_before, 90_before, stress_event
|
SEGMENTING_METHOD: "stress_event" # 30_before, 90_before, stress_event
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
|
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
|
||||||
TIMEZONE:
|
TIMEZONE:
|
||||||
|
@ -733,7 +733,6 @@ PARAMS_FOR_ANALYSIS:
|
||||||
TARGET:
|
TARGET:
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
LABEL: appraisal_stressfulness_event_mean
|
LABEL: appraisal_stressfulness_event_mean
|
||||||
ALL_LABELS: [PANAS_positive_affect_mean, PANAS_negative_affect_mean, JCQ_job_demand_mean, JCQ_job_control_mean, JCQ_supervisor_support_mean,
|
ALL_LABELS: [appraisal_stressfulness_event_mean, appraisal_threat_mean, appraisal_challenge_mean]
|
||||||
JCQ_coworker_support_mean, appraisal_stressfulness_period_mean, appraisal_stressfulness_event_mean, appraisal_threat_mean, appraisal_challenge_mean]
|
|
||||||
# PANAS_positive_affect_mean, PANAS_negative_affect_mean, JCQ_job_demand_mean, JCQ_job_control_mean, JCQ_supervisor_support_mean,
|
# PANAS_positive_affect_mean, PANAS_negative_affect_mean, JCQ_job_demand_mean, JCQ_job_control_mean, JCQ_supervisor_support_mean,
|
||||||
# JCQ_coworker_support_mean, appraisal_stressfulness_period_mean, appraisal_stressfulness_event_mean, appraisal_threat_mean, appraisal_challenge_mean
|
# JCQ_coworker_support_mean, appraisal_stressfulness_period_mean, appraisal_stressfulness_event_mean, appraisal_threat_mean, appraisal_challenge_mean
|
||||||
|
|
|
@ -49,13 +49,13 @@ def extract_ers(esm_df):
|
||||||
extracted_ers (DataFrame): dataframe with all necessary information to write event-related segments file
|
extracted_ers (DataFrame): dataframe with all necessary information to write event-related segments file
|
||||||
in the correct format.
|
in the correct format.
|
||||||
"""
|
"""
|
||||||
pd.set_option("display.max_rows", 20)
|
pd.set_option("display.max_rows", 50)
|
||||||
pd.set_option("display.max_columns", None)
|
pd.set_option("display.max_columns", None)
|
||||||
|
|
||||||
with open('config.yaml', 'r') as stream:
|
with open('config.yaml', 'r') as stream:
|
||||||
config = yaml.load(stream, Loader=yaml.FullLoader)
|
config = yaml.load(stream, Loader=yaml.FullLoader)
|
||||||
|
|
||||||
pd.DataFrame(columns=["label", "intensity"]).to_csv(snakemake.output[1]) # Create an empty stress_events_targets file
|
pd.DataFrame(columns=["label"]).to_csv(snakemake.output[1]) # Create an empty stress_events_targets file
|
||||||
|
|
||||||
esm_preprocessed = clean_up_esm(preprocess_esm(esm_df))
|
esm_preprocessed = clean_up_esm(preprocess_esm(esm_df))
|
||||||
|
|
||||||
|
@ -114,15 +114,21 @@ def extract_ers(esm_df):
|
||||||
possiblity of the participant not remembering the start time percisely => this parameter can be manipulated with the variable
|
possiblity of the participant not remembering the start time percisely => this parameter can be manipulated with the variable
|
||||||
"time_before_event" which is defined below.
|
"time_before_event" which is defined below.
|
||||||
|
|
||||||
|
In case if the participant marked that no stressful event happened, the default of 30 minutes before the event is choosen.
|
||||||
|
In this case, se_threat and se_challenge are NaN.
|
||||||
|
|
||||||
By default, this method also excludes all events that are longer then 2.5 hours so that the segments are easily comparable.
|
By default, this method also excludes all events that are longer then 2.5 hours so that the segments are easily comparable.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Get and join required data
|
# Get and join required data
|
||||||
extracted_ers = esm_df.groupby(["device_id", "esm_session"])['timestamp'].apply(lambda x: math.ceil((x.max() - x.min()) / 1000)).reset_index().rename(columns={'timestamp': 'session_length'}) # questionnaire end timestamp
|
extracted_ers = esm_df.groupby(["device_id", "esm_session"])['timestamp'].apply(lambda x: math.ceil((x.max() - x.min()) / 1000)).reset_index().rename(columns={'timestamp': 'session_length'}) # questionnaire length
|
||||||
extracted_ers = extracted_ers[extracted_ers["session_length"] <= 15 * 60].reset_index(drop=True) # ensure that the longest duration of the questionnaire anwsering is 15 min
|
extracted_ers = extracted_ers[extracted_ers["session_length"] <= 15 * 60].reset_index(drop=True) # ensure that the longest duration of the questionnaire answering is 15 min
|
||||||
session_end_timestamp = esm_df.groupby(['device_id', 'esm_session'])['timestamp'].max().to_frame().rename(columns={'timestamp': 'session_end_timestamp'}) # questionnaire end timestamp
|
session_end_timestamp = esm_df.groupby(['device_id', 'esm_session'])['timestamp'].max().to_frame().rename(columns={'timestamp': 'session_end_timestamp'}) # questionnaire end timestamp
|
||||||
|
|
||||||
se_time = esm_df[esm_df.questionnaire_id == 90.].set_index(['device_id', 'esm_session'])['esm_user_answer'].to_frame().rename(columns={'esm_user_answer': 'se_time'})
|
se_time = esm_df[esm_df.questionnaire_id == 90.].set_index(['device_id', 'esm_session'])['esm_user_answer'].to_frame().rename(columns={'esm_user_answer': 'se_time'})
|
||||||
se_duration = esm_df[esm_df.questionnaire_id == 91.].set_index(['device_id', 'esm_session'])['esm_user_answer'].to_frame().rename(columns={'esm_user_answer': 'se_duration'})
|
se_duration = esm_df[esm_df.questionnaire_id == 91.].set_index(['device_id', 'esm_session'])['esm_user_answer'].to_frame().rename(columns={'esm_user_answer': 'se_duration'})
|
||||||
|
|
||||||
|
|
||||||
# Extracted 3 targets that will be transfered with the csv file to the cleaning script.
|
# Extracted 3 targets that will be transfered with the csv file to the cleaning script.
|
||||||
se_stressfulness_event_tg = esm_df[esm_df.questionnaire_id == 87.].set_index(['device_id', 'esm_session'])['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_stressfulness_event'})
|
se_stressfulness_event_tg = esm_df[esm_df.questionnaire_id == 87.].set_index(['device_id', 'esm_session'])['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_stressfulness_event'})
|
||||||
se_threat_tg = esm_df[esm_df.questionnaire_id == 88.].groupby(["device_id", "esm_session"]).mean()['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_threat'})
|
se_threat_tg = esm_df[esm_df.questionnaire_id == 88.].groupby(["device_id", "esm_session"]).mean()['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_threat'})
|
||||||
|
@ -130,35 +136,40 @@ def extract_ers(esm_df):
|
||||||
|
|
||||||
# All relevant features are joined by inner join to remove standalone columns (e.g., stressfulness event target has larger count)
|
# All relevant features are joined by inner join to remove standalone columns (e.g., stressfulness event target has larger count)
|
||||||
extracted_ers = extracted_ers.join(session_end_timestamp, on=['device_id', 'esm_session'], how='inner') \
|
extracted_ers = extracted_ers.join(session_end_timestamp, on=['device_id', 'esm_session'], how='inner') \
|
||||||
.join(se_time, on=['device_id', 'esm_session'], how='inner') \
|
|
||||||
.join(se_duration, on=['device_id', 'esm_session'], how='inner') \
|
|
||||||
.join(se_stressfulness_event_tg, on=['device_id', 'esm_session'], how='inner') \
|
.join(se_stressfulness_event_tg, on=['device_id', 'esm_session'], how='inner') \
|
||||||
.join(se_threat_tg, on=['device_id', 'esm_session'], how='inner') \
|
.join(se_time, on=['device_id', 'esm_session'], how='left') \
|
||||||
.join(se_challenge_tg, on=['device_id', 'esm_session'], how='inner')
|
.join(se_duration, on=['device_id', 'esm_session'], how='left') \
|
||||||
|
.join(se_threat_tg, on=['device_id', 'esm_session'], how='left') \
|
||||||
|
.join(se_challenge_tg, on=['device_id', 'esm_session'], how='left')
|
||||||
|
|
||||||
|
# Filter-out the sessions that are not useful. Because of the ambiguity this excludes:
|
||||||
# Filter sessions that are not useful. Because of the ambiguity this excludes:
|
|
||||||
# (1) straw event times that are marked as "0 - I don't remember"
|
# (1) straw event times that are marked as "0 - I don't remember"
|
||||||
# (2) straw event durations that are marked as "0 - I don't remember"
|
# (2) straw event durations that are marked as "0 - I don't remember"
|
||||||
extracted_ers = extracted_ers[(~extracted_ers.se_time.str.startswith("0 - ")) & (~extracted_ers.se_duration.str.startswith("0 - "))]
|
extracted_ers = extracted_ers[(~extracted_ers.se_time.astype(str).str.startswith("0 - ")) & (~extracted_ers.se_duration.astype(str).str.startswith("0 - "))]
|
||||||
|
|
||||||
# Transform data into its final form, ready for the extraction
|
|
||||||
extracted_ers.reset_index(drop=True, inplace=True)
|
extracted_ers.reset_index(drop=True, inplace=True)
|
||||||
|
|
||||||
time_before_event = 5 * 60 # in seconds (5 minutes)
|
# Add default duration in case if participant answered that no stressful event occured
|
||||||
extracted_ers['event_timestamp'] = pd.to_datetime(extracted_ers['se_time']).apply(lambda x: x.timestamp() * 1000).astype('int64')
|
def_time_before_questionnaire = 25 * 60 # in seconds (25 minutes.. 5 minutes will be added later) - * 1000 to standardize it in miliseconds
|
||||||
|
extracted_ers["se_duration"] = extracted_ers["se_duration"].fillna((extracted_ers["session_length"] + def_time_before_questionnaire).astype(int) * 1000)
|
||||||
|
|
||||||
|
# Prepare data to fit the data structure in the CSV file ...
|
||||||
|
# Add the event time as the end of the questionnaire if no stress event occured
|
||||||
|
extracted_ers['se_time'] = extracted_ers['se_time'].fillna(extracted_ers['session_end_timestamp'])
|
||||||
|
# Type could be an int (timestamp [ms]) which stays the same, and datetime str which is converted to timestamp in miliseconds
|
||||||
|
extracted_ers['event_timestamp'] = extracted_ers['se_time'].apply(lambda x: x if isinstance(x, int) else pd.to_datetime(x).timestamp() * 1000).astype('int64')
|
||||||
extracted_ers['shift_direction'] = -1
|
extracted_ers['shift_direction'] = -1
|
||||||
|
|
||||||
# Checks whether the duration is marked with "1 - It's still ongoing" which means that the end of the current questionnaire
|
# Checks whether the duration is marked with "1 - It's still ongoing" which means that the end of the current questionnaire
|
||||||
# is taken as end time of the segment. Else the user input duration is taken.
|
# is taken as end time of the segment. Else the user input duration is taken.
|
||||||
extracted_ers['se_duration'] = \
|
extracted_ers['se_duration'] = \
|
||||||
np.where(
|
np.where(
|
||||||
extracted_ers['se_duration'].str.startswith("1 - "),
|
extracted_ers['se_duration'].astype(str).str.startswith("1 - "),
|
||||||
extracted_ers['session_end_timestamp'] - extracted_ers['event_timestamp'],
|
extracted_ers['session_end_timestamp'] - extracted_ers['event_timestamp'],
|
||||||
extracted_ers['se_duration']
|
extracted_ers['se_duration']
|
||||||
)
|
)
|
||||||
|
|
||||||
# This converts the rows of timestamps in miliseconds and the row with datetime to timestamp in seconds.
|
# This converts the rows of timestamps in miliseconds and the rows with datetime... to timestamp in seconds.
|
||||||
|
time_before_event = 5 * 60 # in seconds (5 minutes)
|
||||||
extracted_ers['se_duration'] = \
|
extracted_ers['se_duration'] = \
|
||||||
extracted_ers['se_duration'].apply(lambda x: math.ceil(x / 1000) if isinstance(x, int) else (pd.to_datetime(x).hour * 60 + pd.to_datetime(x).minute) * 60) + time_before_event
|
extracted_ers['se_duration'].apply(lambda x: math.ceil(x / 1000) if isinstance(x, int) else (pd.to_datetime(x).hour * 60 + pd.to_datetime(x).minute) * 60) + time_before_event
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue