Rename target_ to segmenting_ method.

2022-11-14 15:07:36 +00:00 · 2022-11-14 15:07:36 +00:00 · bd41f42a5d
parent a543ce372f
commit bd41f42a5d
2 changed files with 13 additions and 12 deletions
--- a/config.yaml
+++ b/config.yaml
@ -26,7 +26,7 @@ TIME_SEGMENTS: &time_segments
  INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
  TAILORED_EVENTS: # Only relevant if TYPE=EVENT
    COMPUTE: True
-    TARGETS_METHOD: "stress_event" # 30_before, 90_before, stress_event
+    SEGMENTING_METHOD: "stress_event" # 30_before, 90_before, stress_event

 # See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
 TIMEZONE: 
--- a/src/features/phone_esm/straw/process_user_event_related_segments.py
+++ b/src/features/phone_esm/straw/process_user_event_related_segments.py
@ -35,11 +35,11 @@ def format_timestamp(x):
 def extract_ers(esm_df):
    """This method has two major functionalities: 
        (1) It prepares STRAW event-related segments file with the use of esm file. The execution protocol is depended on 
-            the targets method specified in the config.yaml file.
+            the segmenting method specified in the config.yaml file.
        (2) It prepares and writes csv with targets and corresponding time segments labels. This is later used 
            in the overall cleaning script (straw).
    
-    Details about each target method are listed below by each corresponding condition. Refer to the RAPIDS documentation for the 
+    Details about each segmenting method are listed below by each corresponding condition. Refer to the RAPIDS documentation for the 
    ERS file format: https://www.rapids.science/1.9/setup/configuration/#time-segments -> event segments

    Args:
@ -64,20 +64,21 @@ def extract_ers(esm_df):
    esm_filtered_sessions = classified[classified["session_response"] == 'ema_completed'].reset_index()[['device_id', 'esm_session']]
    esm_df = esm_preprocessed.loc[(esm_preprocessed['device_id'].isin(esm_filtered_sessions['device_id'])) & (esm_preprocessed['esm_session'].isin(esm_filtered_sessions['esm_session']))]
    
-    targets_method = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["TARGETS_METHOD"]
-    if targets_method in ["30_before", "90_before"]: # takes 30-minute peroid before the questionnaire + the duration of the questionnaire
+    segmenting_method = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["SEGMENTING_METHOD"]
+    
+    if segmenting_method in ["30_before", "90_before"]: # takes 30-minute peroid before the questionnaire + the duration of the questionnaire
    """ '30-minutes and 90-minutes before' have the same fundamental logic with couple of deviations that will be explained below.
    Both take x-minute period before the questionnaire that is summed with the questionnaire duration.
    All questionnaire durations over 15 minutes are excluded from the querying.
    """
        # Extract time-relevant information
        extracted_ers = esm_df.groupby(["device_id", "esm_session"])['timestamp'].apply(lambda x: math.ceil((x.max() - x.min()) / 1000)).reset_index() # questionnaire length
-        extracted_ers["label"] = f"straw_event_{targets_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3) 
+        extracted_ers["label"] = f"straw_event_{segmenting_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3) 
        extracted_ers[['event_timestamp', 'device_id']] = esm_df.groupby(["device_id", "esm_session"])['timestamp'].min().reset_index()[['timestamp', 'device_id']]
        extracted_ers = extracted_ers[extracted_ers["timestamp"] <= 15 * 60].reset_index(drop=True) # ensure that the longest duration of the questionnaire anwsering is 15 min 
        extracted_ers["shift_direction"] = -1 

-        if targets_method == "30_before":
+        if segmenting_method == "30_before":
        """The method 30-minutes before simply takes 30 minutes before the questionnaire and sums it with the questionnaire duration.
        The timestamps are formatted with the help of format_timestamp() method.
        """
@ -87,7 +88,7 @@ def extract_ers(esm_df):
            extracted_ers["shift"] = time_before_questionnaire
            extracted_ers["shift"] = extracted_ers["shift"].apply(lambda x: format_timestamp(x))
        
-        elif targets_method == "90_before":
+        elif segmenting_method == "90_before":
        """The method 90-minutes before has an important condition. If the time between the current and the previous questionnaire is
        longer then 90 minutes it takes 90 minutes, otherwise it takes the original time difference between the questionnaires.
        """
@ -103,7 +104,7 @@ def extract_ers(esm_df):
            extracted_ers["length"] = (extracted_ers["timestamp"] + extracted_ers["diffs"]).apply(lambda x: format_timestamp(x))
            extracted_ers["shift"] = extracted_ers["diffs"].apply(lambda x: format_timestamp(x))

-    elif targets_method == "stress_event":
+    elif segmenting_method == "stress_event":
        """This is a special case of the method as it consists of two important parts:
            (1) Generating of the ERS file (same as the methods above) and
            (2) Generating targets file alongside with the correct time segment labels.
@ -156,7 +157,7 @@ def extract_ers(esm_df):
        # Exclude events that are longer than 2.5 hours
        extracted_ers = extracted_ers[extracted_ers["se_duration"] <= 2.5 * 60 * 60].reset_index(drop=True) 

-        extracted_ers["label"] = f"straw_event_{targets_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3)
+        extracted_ers["label"] = f"straw_event_{segmenting_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3)
        extracted_ers['shift'] = format_timestamp(time_before_event)
        extracted_ers['length'] = extracted_ers['se_duration'].apply(lambda x: format_timestamp(x))

@ -172,8 +173,8 @@ def extract_ers(esm_df):

 """
 Here the code is executed - this .py file is used both for extraction of the STRAW time_segments file for the individual
-participant, and also for merging all participant's files into one combined file which is later used for assignments of the
-time segments to all sensors.
+participant, and also for merging all participant's files into one combined file which is later used for the time segments 
+to all sensors assignment.

 There are two files involved (see rules extract_event_information_from_esm and merge_event_related_segments_files in preprocessing.smk)
 (1) ERS file which contains all the information about the time segment timings and