diff --git a/config.yaml b/config.yaml index d52804fe..7a82190a 100644 --- a/config.yaml +++ b/config.yaml @@ -27,8 +27,8 @@ TIME_SEGMENTS: &time_segments TAILORED_EVENTS: # Only relevant if TYPE=EVENT COMPUTE: True SEGMENTING_METHOD: "stress_event" # 30_before, 90_before, stress_event - INTERVAL_OF_INTEREST: 5 # duration of event of interest [minutes] - INTERVAL_OF_INTEREST_PADDING: 7.5 # interval of interest padding (before and after IOI) [minutes] + INTERVAL_OF_INTEREST: 10 # duration of event of interest [minutes] + IOI_ERROR_TOLERANCE: 5 # interval of interest erorr tolerance (before and after IOI) [minutes] # See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study TIMEZONE: diff --git a/src/features/phone_esm/straw/process_user_event_related_segments.py b/src/features/phone_esm/straw/process_user_event_related_segments.py index cd091b2c..03eeb052 100644 --- a/src/features/phone_esm/straw/process_user_event_related_segments.py +++ b/src/features/phone_esm/straw/process_user_event_related_segments.py @@ -124,7 +124,7 @@ def extract_ers(esm_df): """ ioi = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["INTERVAL_OF_INTEREST"] * 60 # interval of interest in seconds - ioi_padding = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["INTERVAL_OF_INTEREST_PADDING"] * 60 # interval of interest padding in seconds + ioi_error_tolerance = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["IOI_ERROR_TOLERANCE"] * 60 # interval of interest error tolerance in seconds # Get and join required data extracted_ers = esm_df.groupby(["device_id", "esm_session"])['timestamp'].apply(lambda x: math.ceil((x.max() - x.min()) / 1000)).reset_index().rename(columns={'timestamp': 'session_length'}) # questionnaire length @@ -154,12 +154,13 @@ def extract_ers(esm_df): # Filter-out the sessions that are not useful. Because of the ambiguity this excludes: # (1) straw event times that are marked as "0 - I don't remember" - # (2) straw event durations that are marked as "0 - I don't remember" - extracted_ers = extracted_ers[(~extracted_ers.se_time.astype(str).str.startswith("0 - ")) & (~extracted_ers.se_duration.astype(str).str.startswith("0 - "))] + extracted_ers = extracted_ers[~extracted_ers.se_time.astype(str).str.startswith("0 - ")] extracted_ers.reset_index(drop=True, inplace=True) + extracted_ers.loc[extracted_ers.se_duration.astype(str).str.startswith("0 - "), 'se_duration'] = 0 + # Add default duration in case if participant answered that no stressful event occured - extracted_ers["se_duration"] = extracted_ers["se_duration"].fillna(int((ioi + 2*ioi_padding) * 1000)) + extracted_ers["se_duration"] = extracted_ers["se_duration"].fillna(int((ioi + 2*ioi_error_tolerance) * 1000)) # Prepare data to fit the data structure in the CSV file ... # Add the event time as the end of the questionnaire if no stress event occured @@ -183,20 +184,22 @@ def extract_ers(esm_df): extracted_ers['se_duration'] = \ extracted_ers['se_duration'].apply(lambda x: math.ceil(x / 1000) if isinstance(x, int) else (pd.to_datetime(x).hour * 60 + pd.to_datetime(x).minute) * 60) - # Check whether min se_duration is at least the same duration as the ioi. Filter-out the rest. - extracted_ers = extracted_ers[extracted_ers["se_duration"] >= ioi].reset_index(drop=True) + # Check explicitley whether min duration is at least 0. This will eliminate rows that would be investigated after the end of the questionnaire. + extracted_ers = extracted_ers[extracted_ers['session_end_timestamp'] - extracted_ers['event_timestamp'] >= 0] + # Double check whether min se_duration is at least 0. Filter-out the rest. Negative values are considered invalid. + extracted_ers = extracted_ers[extracted_ers["se_duration"] >= 0].reset_index(drop=True) """>>>>> end section <<<<<""" - # Simply override all remaining durations to be of an equal amount - extracted_ers['se_duration'] = ioi + 2*ioi_padding + # Simply override all durations to be of an equal amount + extracted_ers['se_duration'] = ioi + 2*ioi_error_tolerance - # + # If target is 0 then shift by the total stress event duration, otherwise shift it by ioi_tolerance extracted_ers['shift'] = \ np.where( extracted_ers['appraisal_stressfulness_event'] == 0, extracted_ers['se_duration'], - ioi_padding + ioi_error_tolerance ) extracted_ers['shift'] = extracted_ers['shift'].apply(lambda x: format_timestamp(int(x))) @@ -214,7 +217,7 @@ def extract_ers(esm_df): else: raise Exception("Please select correct target method for the event-related segments.") extracted_ers = pd.DataFrame(columns=["label", "event_timestamp", "length", "shift", "shift_direction", "device_id"]) - + return extracted_ers[["label", "event_timestamp", "length", "shift", "shift_direction", "device_id"]]