Rename target_ to segmenting_ method.

imputation_and_cleaning
Primoz 2022-11-14 15:07:36 +00:00
parent a543ce372f
commit bd41f42a5d
2 changed files with 13 additions and 12 deletions

View File

@ -26,7 +26,7 @@ TIME_SEGMENTS: &time_segments
INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
TAILORED_EVENTS: # Only relevant if TYPE=EVENT TAILORED_EVENTS: # Only relevant if TYPE=EVENT
COMPUTE: True COMPUTE: True
TARGETS_METHOD: "stress_event" # 30_before, 90_before, stress_event SEGMENTING_METHOD: "stress_event" # 30_before, 90_before, stress_event
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study # See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
TIMEZONE: TIMEZONE:

View File

@ -35,11 +35,11 @@ def format_timestamp(x):
def extract_ers(esm_df): def extract_ers(esm_df):
"""This method has two major functionalities: """This method has two major functionalities:
(1) It prepares STRAW event-related segments file with the use of esm file. The execution protocol is depended on (1) It prepares STRAW event-related segments file with the use of esm file. The execution protocol is depended on
the targets method specified in the config.yaml file. the segmenting method specified in the config.yaml file.
(2) It prepares and writes csv with targets and corresponding time segments labels. This is later used (2) It prepares and writes csv with targets and corresponding time segments labels. This is later used
in the overall cleaning script (straw). in the overall cleaning script (straw).
Details about each target method are listed below by each corresponding condition. Refer to the RAPIDS documentation for the Details about each segmenting method are listed below by each corresponding condition. Refer to the RAPIDS documentation for the
ERS file format: https://www.rapids.science/1.9/setup/configuration/#time-segments -> event segments ERS file format: https://www.rapids.science/1.9/setup/configuration/#time-segments -> event segments
Args: Args:
@ -64,20 +64,21 @@ def extract_ers(esm_df):
esm_filtered_sessions = classified[classified["session_response"] == 'ema_completed'].reset_index()[['device_id', 'esm_session']] esm_filtered_sessions = classified[classified["session_response"] == 'ema_completed'].reset_index()[['device_id', 'esm_session']]
esm_df = esm_preprocessed.loc[(esm_preprocessed['device_id'].isin(esm_filtered_sessions['device_id'])) & (esm_preprocessed['esm_session'].isin(esm_filtered_sessions['esm_session']))] esm_df = esm_preprocessed.loc[(esm_preprocessed['device_id'].isin(esm_filtered_sessions['device_id'])) & (esm_preprocessed['esm_session'].isin(esm_filtered_sessions['esm_session']))]
targets_method = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["TARGETS_METHOD"] segmenting_method = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["SEGMENTING_METHOD"]
if targets_method in ["30_before", "90_before"]: # takes 30-minute peroid before the questionnaire + the duration of the questionnaire
if segmenting_method in ["30_before", "90_before"]: # takes 30-minute peroid before the questionnaire + the duration of the questionnaire
""" '30-minutes and 90-minutes before' have the same fundamental logic with couple of deviations that will be explained below. """ '30-minutes and 90-minutes before' have the same fundamental logic with couple of deviations that will be explained below.
Both take x-minute period before the questionnaire that is summed with the questionnaire duration. Both take x-minute period before the questionnaire that is summed with the questionnaire duration.
All questionnaire durations over 15 minutes are excluded from the querying. All questionnaire durations over 15 minutes are excluded from the querying.
""" """
# Extract time-relevant information # Extract time-relevant information
extracted_ers = esm_df.groupby(["device_id", "esm_session"])['timestamp'].apply(lambda x: math.ceil((x.max() - x.min()) / 1000)).reset_index() # questionnaire length extracted_ers = esm_df.groupby(["device_id", "esm_session"])['timestamp'].apply(lambda x: math.ceil((x.max() - x.min()) / 1000)).reset_index() # questionnaire length
extracted_ers["label"] = f"straw_event_{targets_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3) extracted_ers["label"] = f"straw_event_{segmenting_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3)
extracted_ers[['event_timestamp', 'device_id']] = esm_df.groupby(["device_id", "esm_session"])['timestamp'].min().reset_index()[['timestamp', 'device_id']] extracted_ers[['event_timestamp', 'device_id']] = esm_df.groupby(["device_id", "esm_session"])['timestamp'].min().reset_index()[['timestamp', 'device_id']]
extracted_ers = extracted_ers[extracted_ers["timestamp"] <= 15 * 60].reset_index(drop=True) # ensure that the longest duration of the questionnaire anwsering is 15 min extracted_ers = extracted_ers[extracted_ers["timestamp"] <= 15 * 60].reset_index(drop=True) # ensure that the longest duration of the questionnaire anwsering is 15 min
extracted_ers["shift_direction"] = -1 extracted_ers["shift_direction"] = -1
if targets_method == "30_before": if segmenting_method == "30_before":
"""The method 30-minutes before simply takes 30 minutes before the questionnaire and sums it with the questionnaire duration. """The method 30-minutes before simply takes 30 minutes before the questionnaire and sums it with the questionnaire duration.
The timestamps are formatted with the help of format_timestamp() method. The timestamps are formatted with the help of format_timestamp() method.
""" """
@ -87,7 +88,7 @@ def extract_ers(esm_df):
extracted_ers["shift"] = time_before_questionnaire extracted_ers["shift"] = time_before_questionnaire
extracted_ers["shift"] = extracted_ers["shift"].apply(lambda x: format_timestamp(x)) extracted_ers["shift"] = extracted_ers["shift"].apply(lambda x: format_timestamp(x))
elif targets_method == "90_before": elif segmenting_method == "90_before":
"""The method 90-minutes before has an important condition. If the time between the current and the previous questionnaire is """The method 90-minutes before has an important condition. If the time between the current and the previous questionnaire is
longer then 90 minutes it takes 90 minutes, otherwise it takes the original time difference between the questionnaires. longer then 90 minutes it takes 90 minutes, otherwise it takes the original time difference between the questionnaires.
""" """
@ -103,7 +104,7 @@ def extract_ers(esm_df):
extracted_ers["length"] = (extracted_ers["timestamp"] + extracted_ers["diffs"]).apply(lambda x: format_timestamp(x)) extracted_ers["length"] = (extracted_ers["timestamp"] + extracted_ers["diffs"]).apply(lambda x: format_timestamp(x))
extracted_ers["shift"] = extracted_ers["diffs"].apply(lambda x: format_timestamp(x)) extracted_ers["shift"] = extracted_ers["diffs"].apply(lambda x: format_timestamp(x))
elif targets_method == "stress_event": elif segmenting_method == "stress_event":
"""This is a special case of the method as it consists of two important parts: """This is a special case of the method as it consists of two important parts:
(1) Generating of the ERS file (same as the methods above) and (1) Generating of the ERS file (same as the methods above) and
(2) Generating targets file alongside with the correct time segment labels. (2) Generating targets file alongside with the correct time segment labels.
@ -156,7 +157,7 @@ def extract_ers(esm_df):
# Exclude events that are longer than 2.5 hours # Exclude events that are longer than 2.5 hours
extracted_ers = extracted_ers[extracted_ers["se_duration"] <= 2.5 * 60 * 60].reset_index(drop=True) extracted_ers = extracted_ers[extracted_ers["se_duration"] <= 2.5 * 60 * 60].reset_index(drop=True)
extracted_ers["label"] = f"straw_event_{targets_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3) extracted_ers["label"] = f"straw_event_{segmenting_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3)
extracted_ers['shift'] = format_timestamp(time_before_event) extracted_ers['shift'] = format_timestamp(time_before_event)
extracted_ers['length'] = extracted_ers['se_duration'].apply(lambda x: format_timestamp(x)) extracted_ers['length'] = extracted_ers['se_duration'].apply(lambda x: format_timestamp(x))
@ -172,8 +173,8 @@ def extract_ers(esm_df):
""" """
Here the code is executed - this .py file is used both for extraction of the STRAW time_segments file for the individual Here the code is executed - this .py file is used both for extraction of the STRAW time_segments file for the individual
participant, and also for merging all participant's files into one combined file which is later used for assignments of the participant, and also for merging all participant's files into one combined file which is later used for the time segments
time segments to all sensors. to all sensors assignment.
There are two files involved (see rules extract_event_information_from_esm and merge_event_related_segments_files in preprocessing.smk) There are two files involved (see rules extract_event_information_from_esm and merge_event_related_segments_files in preprocessing.smk)
(1) ERS file which contains all the information about the time segment timings and (1) ERS file which contains all the information about the time segment timings and