Make stress events to be equal in duration.

sociality-task
Primoz 2022-12-14 14:52:20 +00:00
parent 3ce7f2c2a5
commit 7f5a4e6744
3 changed files with 46 additions and 15 deletions

View File

@ -27,6 +27,8 @@ TIME_SEGMENTS: &time_segments
TAILORED_EVENTS: # Only relevant if TYPE=EVENT
COMPUTE: True
SEGMENTING_METHOD: "stress_event" # 30_before, 90_before, stress_event
INTERVAL_OF_INTEREST: 5 # duration of event of interest [minutes]
INTERVAL_OF_INTEREST_PADDING: 7.5 # interval of interest padding (before and after IOI) [minutes]
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
TIMEZONE:

View File

@ -23,6 +23,9 @@ def straw_cleaning(sensor_data_files, provider, target):
graph_bf_af(features, "1target_rows_before")
# TODO: need to check whether all of the participants contain E4 columns
sys.exit()
# (1.0) OVERRIDE STRESSFULNESS EVENT TARGETS IF ERS SEGMENTING_METHOD IS "STRESS_EVENT"
if config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["SEGMENTING_METHOD"] == "stress_event":

View File

@ -49,7 +49,8 @@ def extract_ers(esm_df):
extracted_ers (DataFrame): dataframe with all necessary information to write event-related segments file
in the correct format.
"""
pd.set_option("display.max_rows", 50)
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", None)
with open('config.yaml', 'r') as stream:
@ -105,7 +106,9 @@ def extract_ers(esm_df):
extracted_ers["shift"] = extracted_ers["diffs"].apply(lambda x: format_timestamp(x))
elif segmenting_method == "stress_event":
"""This is a special case of the method as it consists of two important parts:
"""
TODO: update documentation for this condition
This is a special case of the method as it consists of two important parts:
(1) Generating of the ERS file (same as the methods above) and
(2) Generating targets file alongside with the correct time segment labels.
@ -120,22 +123,29 @@ def extract_ers(esm_df):
By default, this method also excludes all events that are longer then 2.5 hours so that the segments are easily comparable.
"""
ioi = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["INTERVAL_OF_INTEREST"] * 60 # interval of interest in seconds
ioi_padding = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["INTERVAL_OF_INTEREST_PADDING"] * 60 # interval of interest padding in seconds
# Get and join required data
extracted_ers = esm_df.groupby(["device_id", "esm_session"])['timestamp'].apply(lambda x: math.ceil((x.max() - x.min()) / 1000)).reset_index().rename(columns={'timestamp': 'session_length'}) # questionnaire length
extracted_ers = extracted_ers[extracted_ers["session_length"] <= 15 * 60].reset_index(drop=True) # ensure that the longest duration of the questionnaire answering is 15 min
session_start_timestamp = esm_df.groupby(['device_id', 'esm_session'])['timestamp'].min().to_frame().rename(columns={'timestamp': 'session_start_timestamp'}) # questionnaire start timestamp
session_end_timestamp = esm_df.groupby(['device_id', 'esm_session'])['timestamp'].max().to_frame().rename(columns={'timestamp': 'session_end_timestamp'}) # questionnaire end timestamp
# Users' answers for the stressfulness event (se) start times and durations
se_time = esm_df[esm_df.questionnaire_id == 90.].set_index(['device_id', 'esm_session'])['esm_user_answer'].to_frame().rename(columns={'esm_user_answer': 'se_time'})
se_duration = esm_df[esm_df.questionnaire_id == 91.].set_index(['device_id', 'esm_session'])['esm_user_answer'].to_frame().rename(columns={'esm_user_answer': 'se_duration'})
# Make se_durations to the appropriate lengths
# Extracted 3 targets that will be transfered with the csv file to the cleaning script.
# Extracted 3 targets that will be transfered in the csv file to the cleaning script.
se_stressfulness_event_tg = esm_df[esm_df.questionnaire_id == 87.].set_index(['device_id', 'esm_session'])['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_stressfulness_event'})
se_threat_tg = esm_df[esm_df.questionnaire_id == 88.].groupby(["device_id", "esm_session"]).mean()['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_threat'})
se_challenge_tg = esm_df[esm_df.questionnaire_id == 89.].groupby(["device_id", "esm_session"]).mean()['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_challenge'})
# All relevant features are joined by inner join to remove standalone columns (e.g., stressfulness event target has larger count)
extracted_ers = extracted_ers.join(session_end_timestamp, on=['device_id', 'esm_session'], how='inner') \
extracted_ers = extracted_ers.join(session_start_timestamp, on=['device_id', 'esm_session'], how='inner') \
.join(session_end_timestamp, on=['device_id', 'esm_session'], how='inner') \
.join(se_stressfulness_event_tg, on=['device_id', 'esm_session'], how='inner') \
.join(se_time, on=['device_id', 'esm_session'], how='left') \
.join(se_duration, on=['device_id', 'esm_session'], how='left') \
@ -149,16 +159,17 @@ def extract_ers(esm_df):
extracted_ers.reset_index(drop=True, inplace=True)
# Add default duration in case if participant answered that no stressful event occured
def_time_before_questionnaire = 25 * 60 # in seconds (25 minutes.. 5 minutes will be added later) - * 1000 to standardize it in miliseconds
extracted_ers["se_duration"] = extracted_ers["se_duration"].fillna((extracted_ers["session_length"] + def_time_before_questionnaire).astype(int) * 1000)
extracted_ers["se_duration"] = extracted_ers["se_duration"].fillna(int((ioi + 2*ioi_padding) * 1000))
# Prepare data to fit the data structure in the CSV file ...
# Add the event time as the end of the questionnaire if no stress event occured
extracted_ers['se_time'] = extracted_ers['se_time'].fillna(extracted_ers['session_end_timestamp'])
extracted_ers['se_time'] = extracted_ers['se_time'].fillna(extracted_ers['session_start_timestamp'])
# Type could be an int (timestamp [ms]) which stays the same, and datetime str which is converted to timestamp in miliseconds
extracted_ers['event_timestamp'] = extracted_ers['se_time'].apply(lambda x: x if isinstance(x, int) else pd.to_datetime(x).timestamp() * 1000).astype('int64')
extracted_ers['shift_direction'] = -1
""">>>>> begin section (could be optimized) <<<<<"""
# Checks whether the duration is marked with "1 - It's still ongoing" which means that the end of the current questionnaire
# is taken as end time of the segment. Else the user input duration is taken.
extracted_ers['se_duration'] = \
@ -169,14 +180,29 @@ def extract_ers(esm_df):
)
# This converts the rows of timestamps in miliseconds and the rows with datetime... to timestamp in seconds.
time_before_event = 5 * 60 # in seconds (5 minutes)
extracted_ers['se_duration'] = \
extracted_ers['se_duration'].apply(lambda x: math.ceil(x / 1000) if isinstance(x, int) else (pd.to_datetime(x).hour * 60 + pd.to_datetime(x).minute) * 60) + time_before_event
extracted_ers['se_duration'].apply(lambda x: math.ceil(x / 1000) if isinstance(x, int) else (pd.to_datetime(x).hour * 60 + pd.to_datetime(x).minute) * 60)
extracted_ers['shift'] = format_timestamp(time_before_event)
extracted_ers['length'] = extracted_ers['se_duration'].apply(lambda x: format_timestamp(x))
# Check whether min se_duration is at least the same duration as the ioi. Filter-out the rest.
extracted_ers = extracted_ers[extracted_ers["se_duration"] >= ioi].reset_index(drop=True)
# Drop event_timestamp duplicates in case of user referencing the same event over multiple questionnaires
""">>>>> end section <<<<<"""
# Simply override all remaining durations to be of an equal amount
extracted_ers['se_duration'] = ioi + 2*ioi_padding
#
extracted_ers['shift'] = \
np.where(
extracted_ers['appraisal_stressfulness_event'] == 0,
extracted_ers['se_duration'],
ioi_padding
)
extracted_ers['shift'] = extracted_ers['shift'].apply(lambda x: format_timestamp(int(x)))
extracted_ers['length'] = extracted_ers['se_duration'].apply(lambda x: format_timestamp(int(x)))
# Drop event_timestamp duplicates in case in the user is referencing the same event over multiple questionnaires
extracted_ers.drop_duplicates(subset=["event_timestamp"], keep='first', inplace=True)
extracted_ers.reset_index(drop=True, inplace=True)