diff --git a/config.yaml b/config.yaml index 949b7ab1..3c3feb73 100644 --- a/config.yaml +++ b/config.yaml @@ -26,7 +26,7 @@ TIME_SEGMENTS: &time_segments INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs TAILORED_EVENTS: # Only relevant if TYPE=EVENT COMPUTE: True - TARGETS_METHOD: "stress_event" # 30_before, 90_before, stress_event + SEGMENTING_METHOD: "stress_event" # 30_before, 90_before, stress_event # See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study TIMEZONE: diff --git a/src/features/phone_esm/straw/process_user_event_related_segments.py b/src/features/phone_esm/straw/process_user_event_related_segments.py index 58af30d1..61559f35 100644 --- a/src/features/phone_esm/straw/process_user_event_related_segments.py +++ b/src/features/phone_esm/straw/process_user_event_related_segments.py @@ -35,11 +35,11 @@ def format_timestamp(x): def extract_ers(esm_df): """This method has two major functionalities: (1) It prepares STRAW event-related segments file with the use of esm file. The execution protocol is depended on - the targets method specified in the config.yaml file. + the segmenting method specified in the config.yaml file. (2) It prepares and writes csv with targets and corresponding time segments labels. This is later used in the overall cleaning script (straw). - Details about each target method are listed below by each corresponding condition. Refer to the RAPIDS documentation for the + Details about each segmenting method are listed below by each corresponding condition. Refer to the RAPIDS documentation for the ERS file format: https://www.rapids.science/1.9/setup/configuration/#time-segments -> event segments Args: @@ -64,20 +64,21 @@ def extract_ers(esm_df): esm_filtered_sessions = classified[classified["session_response"] == 'ema_completed'].reset_index()[['device_id', 'esm_session']] esm_df = esm_preprocessed.loc[(esm_preprocessed['device_id'].isin(esm_filtered_sessions['device_id'])) & (esm_preprocessed['esm_session'].isin(esm_filtered_sessions['esm_session']))] - targets_method = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["TARGETS_METHOD"] - if targets_method in ["30_before", "90_before"]: # takes 30-minute peroid before the questionnaire + the duration of the questionnaire + segmenting_method = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["SEGMENTING_METHOD"] + + if segmenting_method in ["30_before", "90_before"]: # takes 30-minute peroid before the questionnaire + the duration of the questionnaire """ '30-minutes and 90-minutes before' have the same fundamental logic with couple of deviations that will be explained below. Both take x-minute period before the questionnaire that is summed with the questionnaire duration. All questionnaire durations over 15 minutes are excluded from the querying. """ # Extract time-relevant information extracted_ers = esm_df.groupby(["device_id", "esm_session"])['timestamp'].apply(lambda x: math.ceil((x.max() - x.min()) / 1000)).reset_index() # questionnaire length - extracted_ers["label"] = f"straw_event_{targets_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3) + extracted_ers["label"] = f"straw_event_{segmenting_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3) extracted_ers[['event_timestamp', 'device_id']] = esm_df.groupby(["device_id", "esm_session"])['timestamp'].min().reset_index()[['timestamp', 'device_id']] extracted_ers = extracted_ers[extracted_ers["timestamp"] <= 15 * 60].reset_index(drop=True) # ensure that the longest duration of the questionnaire anwsering is 15 min extracted_ers["shift_direction"] = -1 - if targets_method == "30_before": + if segmenting_method == "30_before": """The method 30-minutes before simply takes 30 minutes before the questionnaire and sums it with the questionnaire duration. The timestamps are formatted with the help of format_timestamp() method. """ @@ -87,7 +88,7 @@ def extract_ers(esm_df): extracted_ers["shift"] = time_before_questionnaire extracted_ers["shift"] = extracted_ers["shift"].apply(lambda x: format_timestamp(x)) - elif targets_method == "90_before": + elif segmenting_method == "90_before": """The method 90-minutes before has an important condition. If the time between the current and the previous questionnaire is longer then 90 minutes it takes 90 minutes, otherwise it takes the original time difference between the questionnaires. """ @@ -103,7 +104,7 @@ def extract_ers(esm_df): extracted_ers["length"] = (extracted_ers["timestamp"] + extracted_ers["diffs"]).apply(lambda x: format_timestamp(x)) extracted_ers["shift"] = extracted_ers["diffs"].apply(lambda x: format_timestamp(x)) - elif targets_method == "stress_event": + elif segmenting_method == "stress_event": """This is a special case of the method as it consists of two important parts: (1) Generating of the ERS file (same as the methods above) and (2) Generating targets file alongside with the correct time segment labels. @@ -156,7 +157,7 @@ def extract_ers(esm_df): # Exclude events that are longer than 2.5 hours extracted_ers = extracted_ers[extracted_ers["se_duration"] <= 2.5 * 60 * 60].reset_index(drop=True) - extracted_ers["label"] = f"straw_event_{targets_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3) + extracted_ers["label"] = f"straw_event_{segmenting_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3) extracted_ers['shift'] = format_timestamp(time_before_event) extracted_ers['length'] = extracted_ers['se_duration'].apply(lambda x: format_timestamp(x)) @@ -172,8 +173,8 @@ def extract_ers(esm_df): """ Here the code is executed - this .py file is used both for extraction of the STRAW time_segments file for the individual -participant, and also for merging all participant's files into one combined file which is later used for assignments of the -time segments to all sensors. +participant, and also for merging all participant's files into one combined file which is later used for the time segments +to all sensors assignment. There are two files involved (see rules extract_event_information_from_esm and merge_event_related_segments_files in preprocessing.smk) (1) ERS file which contains all the information about the time segment timings and