Rename target_ to segmenting_ method.
parent
a543ce372f
commit
bd41f42a5d
|
@ -26,7 +26,7 @@ TIME_SEGMENTS: &time_segments
|
||||||
INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
|
INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
|
||||||
TAILORED_EVENTS: # Only relevant if TYPE=EVENT
|
TAILORED_EVENTS: # Only relevant if TYPE=EVENT
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
TARGETS_METHOD: "stress_event" # 30_before, 90_before, stress_event
|
SEGMENTING_METHOD: "stress_event" # 30_before, 90_before, stress_event
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
|
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
|
||||||
TIMEZONE:
|
TIMEZONE:
|
||||||
|
|
|
@ -35,11 +35,11 @@ def format_timestamp(x):
|
||||||
def extract_ers(esm_df):
|
def extract_ers(esm_df):
|
||||||
"""This method has two major functionalities:
|
"""This method has two major functionalities:
|
||||||
(1) It prepares STRAW event-related segments file with the use of esm file. The execution protocol is depended on
|
(1) It prepares STRAW event-related segments file with the use of esm file. The execution protocol is depended on
|
||||||
the targets method specified in the config.yaml file.
|
the segmenting method specified in the config.yaml file.
|
||||||
(2) It prepares and writes csv with targets and corresponding time segments labels. This is later used
|
(2) It prepares and writes csv with targets and corresponding time segments labels. This is later used
|
||||||
in the overall cleaning script (straw).
|
in the overall cleaning script (straw).
|
||||||
|
|
||||||
Details about each target method are listed below by each corresponding condition. Refer to the RAPIDS documentation for the
|
Details about each segmenting method are listed below by each corresponding condition. Refer to the RAPIDS documentation for the
|
||||||
ERS file format: https://www.rapids.science/1.9/setup/configuration/#time-segments -> event segments
|
ERS file format: https://www.rapids.science/1.9/setup/configuration/#time-segments -> event segments
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -64,20 +64,21 @@ def extract_ers(esm_df):
|
||||||
esm_filtered_sessions = classified[classified["session_response"] == 'ema_completed'].reset_index()[['device_id', 'esm_session']]
|
esm_filtered_sessions = classified[classified["session_response"] == 'ema_completed'].reset_index()[['device_id', 'esm_session']]
|
||||||
esm_df = esm_preprocessed.loc[(esm_preprocessed['device_id'].isin(esm_filtered_sessions['device_id'])) & (esm_preprocessed['esm_session'].isin(esm_filtered_sessions['esm_session']))]
|
esm_df = esm_preprocessed.loc[(esm_preprocessed['device_id'].isin(esm_filtered_sessions['device_id'])) & (esm_preprocessed['esm_session'].isin(esm_filtered_sessions['esm_session']))]
|
||||||
|
|
||||||
targets_method = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["TARGETS_METHOD"]
|
segmenting_method = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["SEGMENTING_METHOD"]
|
||||||
if targets_method in ["30_before", "90_before"]: # takes 30-minute peroid before the questionnaire + the duration of the questionnaire
|
|
||||||
|
if segmenting_method in ["30_before", "90_before"]: # takes 30-minute peroid before the questionnaire + the duration of the questionnaire
|
||||||
""" '30-minutes and 90-minutes before' have the same fundamental logic with couple of deviations that will be explained below.
|
""" '30-minutes and 90-minutes before' have the same fundamental logic with couple of deviations that will be explained below.
|
||||||
Both take x-minute period before the questionnaire that is summed with the questionnaire duration.
|
Both take x-minute period before the questionnaire that is summed with the questionnaire duration.
|
||||||
All questionnaire durations over 15 minutes are excluded from the querying.
|
All questionnaire durations over 15 minutes are excluded from the querying.
|
||||||
"""
|
"""
|
||||||
# Extract time-relevant information
|
# Extract time-relevant information
|
||||||
extracted_ers = esm_df.groupby(["device_id", "esm_session"])['timestamp'].apply(lambda x: math.ceil((x.max() - x.min()) / 1000)).reset_index() # questionnaire length
|
extracted_ers = esm_df.groupby(["device_id", "esm_session"])['timestamp'].apply(lambda x: math.ceil((x.max() - x.min()) / 1000)).reset_index() # questionnaire length
|
||||||
extracted_ers["label"] = f"straw_event_{targets_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3)
|
extracted_ers["label"] = f"straw_event_{segmenting_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3)
|
||||||
extracted_ers[['event_timestamp', 'device_id']] = esm_df.groupby(["device_id", "esm_session"])['timestamp'].min().reset_index()[['timestamp', 'device_id']]
|
extracted_ers[['event_timestamp', 'device_id']] = esm_df.groupby(["device_id", "esm_session"])['timestamp'].min().reset_index()[['timestamp', 'device_id']]
|
||||||
extracted_ers = extracted_ers[extracted_ers["timestamp"] <= 15 * 60].reset_index(drop=True) # ensure that the longest duration of the questionnaire anwsering is 15 min
|
extracted_ers = extracted_ers[extracted_ers["timestamp"] <= 15 * 60].reset_index(drop=True) # ensure that the longest duration of the questionnaire anwsering is 15 min
|
||||||
extracted_ers["shift_direction"] = -1
|
extracted_ers["shift_direction"] = -1
|
||||||
|
|
||||||
if targets_method == "30_before":
|
if segmenting_method == "30_before":
|
||||||
"""The method 30-minutes before simply takes 30 minutes before the questionnaire and sums it with the questionnaire duration.
|
"""The method 30-minutes before simply takes 30 minutes before the questionnaire and sums it with the questionnaire duration.
|
||||||
The timestamps are formatted with the help of format_timestamp() method.
|
The timestamps are formatted with the help of format_timestamp() method.
|
||||||
"""
|
"""
|
||||||
|
@ -87,7 +88,7 @@ def extract_ers(esm_df):
|
||||||
extracted_ers["shift"] = time_before_questionnaire
|
extracted_ers["shift"] = time_before_questionnaire
|
||||||
extracted_ers["shift"] = extracted_ers["shift"].apply(lambda x: format_timestamp(x))
|
extracted_ers["shift"] = extracted_ers["shift"].apply(lambda x: format_timestamp(x))
|
||||||
|
|
||||||
elif targets_method == "90_before":
|
elif segmenting_method == "90_before":
|
||||||
"""The method 90-minutes before has an important condition. If the time between the current and the previous questionnaire is
|
"""The method 90-minutes before has an important condition. If the time between the current and the previous questionnaire is
|
||||||
longer then 90 minutes it takes 90 minutes, otherwise it takes the original time difference between the questionnaires.
|
longer then 90 minutes it takes 90 minutes, otherwise it takes the original time difference between the questionnaires.
|
||||||
"""
|
"""
|
||||||
|
@ -103,7 +104,7 @@ def extract_ers(esm_df):
|
||||||
extracted_ers["length"] = (extracted_ers["timestamp"] + extracted_ers["diffs"]).apply(lambda x: format_timestamp(x))
|
extracted_ers["length"] = (extracted_ers["timestamp"] + extracted_ers["diffs"]).apply(lambda x: format_timestamp(x))
|
||||||
extracted_ers["shift"] = extracted_ers["diffs"].apply(lambda x: format_timestamp(x))
|
extracted_ers["shift"] = extracted_ers["diffs"].apply(lambda x: format_timestamp(x))
|
||||||
|
|
||||||
elif targets_method == "stress_event":
|
elif segmenting_method == "stress_event":
|
||||||
"""This is a special case of the method as it consists of two important parts:
|
"""This is a special case of the method as it consists of two important parts:
|
||||||
(1) Generating of the ERS file (same as the methods above) and
|
(1) Generating of the ERS file (same as the methods above) and
|
||||||
(2) Generating targets file alongside with the correct time segment labels.
|
(2) Generating targets file alongside with the correct time segment labels.
|
||||||
|
@ -156,7 +157,7 @@ def extract_ers(esm_df):
|
||||||
# Exclude events that are longer than 2.5 hours
|
# Exclude events that are longer than 2.5 hours
|
||||||
extracted_ers = extracted_ers[extracted_ers["se_duration"] <= 2.5 * 60 * 60].reset_index(drop=True)
|
extracted_ers = extracted_ers[extracted_ers["se_duration"] <= 2.5 * 60 * 60].reset_index(drop=True)
|
||||||
|
|
||||||
extracted_ers["label"] = f"straw_event_{targets_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3)
|
extracted_ers["label"] = f"straw_event_{segmenting_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3)
|
||||||
extracted_ers['shift'] = format_timestamp(time_before_event)
|
extracted_ers['shift'] = format_timestamp(time_before_event)
|
||||||
extracted_ers['length'] = extracted_ers['se_duration'].apply(lambda x: format_timestamp(x))
|
extracted_ers['length'] = extracted_ers['se_duration'].apply(lambda x: format_timestamp(x))
|
||||||
|
|
||||||
|
@ -172,8 +173,8 @@ def extract_ers(esm_df):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Here the code is executed - this .py file is used both for extraction of the STRAW time_segments file for the individual
|
Here the code is executed - this .py file is used both for extraction of the STRAW time_segments file for the individual
|
||||||
participant, and also for merging all participant's files into one combined file which is later used for assignments of the
|
participant, and also for merging all participant's files into one combined file which is later used for the time segments
|
||||||
time segments to all sensors.
|
to all sensors assignment.
|
||||||
|
|
||||||
There are two files involved (see rules extract_event_information_from_esm and merge_event_related_segments_files in preprocessing.smk)
|
There are two files involved (see rules extract_event_information_from_esm and merge_event_related_segments_files in preprocessing.smk)
|
||||||
(1) ERS file which contains all the information about the time segment timings and
|
(1) ERS file which contains all the information about the time segment timings and
|
||||||
|
|
Loading…
Reference in New Issue