Event segments' device id can be one of any assigned to a participant

2020-12-02 17:17:55 -05:00 · 2020-12-02 17:17:55 -05:00 · c24cba8679
parent 0bd43c139c
commit c24cba8679
2 changed files with 12 additions and 9 deletions
--- a/docs/setup/configuration.md
+++ b/docs/setup/configuration.md
@ -288,12 +288,12 @@ Day segments (or epochs) are the time windows on which you want to extract behav
    | Column        | Description                                                                                                                                                                                                   |
    |---------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-    | label         | A string that is used as a prefix in the name of your day segments. If labels are unique, every segment is independent; if two or more segments have the same label, their data will be grouped when computing features like the `most frequent contact` for calls (the most frequent contact will be computed across all these segments)                                                                                                     |
+    | label         | A string that is used as a prefix in the name of your day segments. If labels are unique, every segment is independent; if two or more segments have the same label, their data will be grouped when computing auxiliary data for features like the `most frequent contact` for calls (the most frequent contact will be computed across all these segments). There cannot be two *overlaping* event segments with the same label (RAPIDS will throw an error)                                                                                                      |
    | event_timestamp    | A UNIX timestamp that represents the moment an event of interest happened (clinical relapse, survey, readmission, etc.). The corresponding day segment will be computed around this moment using `length`, `shift`, and `shift_direction`                                                                                            |
    | length        | A string representing the length of this segment. It can have one or more of the following keys `XXD XXH XXM XXS` to represent a number of days, hours, minutes, and seconds. For example `7D 23H 59M 59S`                        |
    | shift    | A string representing the time shift from `event_timestamp`. It can have one or more of the following keys `XXD XXH XXM XXS` to represent a number of days, hours, minutes and seconds. For example `7D 23H 59M 59S`. Use this value to  change the start of a segment with respect to its `event_timestamp`. For example, set this variable to `1H` to create a segment that starts 1 hour from an event of interest (`shift_direction` determines if it's before or after).                                                                        |
    | shift_direction | An integer representing whether the `shift` is before (`-1`) or after (`1`) an `event_timestamp` |
-    |device_id| The device id (smartphone or fitbit) to whom this segment belongs to. You have to create a line in the event segment file for each event of a participant that you want to analyse|
+    |device_id| The device id (smartphone or fitbit) to whom this segment belongs to. You have to create a line in this event segment file for each event of a participant that you want to analyse. If you have participants with multiple device ids you can choose any of them|
    !!! example
        ```csv
--- a/src/data/compute_day_segments.py
+++ b/src/data/compute_day_segments.py
@ -168,10 +168,10 @@ def parse_periodic_segments(day_segments):
    day_segments.loc[day_segments["repeats_on"] == "every_day", "repeats_value"] = 0
    return day_segments
-def parse_event_segments(day_segments, device_id):
+def parse_event_segments(day_segments, device_ids):
-    return day_segments.query("device_id == @device_id")
+    return day_segments.query("device_id == @device_ids")
-def parse_day_segments(day_segments_file, segments_type, device_id):
+def parse_day_segments(day_segments_file, segments_type, device_ids):
    # Add code to validate and parse frequencies, intervals, and events
    # Expected formats:
    # Frequency: label, length columns (e.g. my_prefix, 5) length has to be in minutes (int)
@ -196,15 +196,18 @@ def parse_day_segments(day_segments_file, segments_type, device_id):
    elif(segments_type == "PERIODIC" and is_valid_periodic_segments(day_segments, day_segments_file)):
        day_segments = parse_periodic_segments(day_segments)
    elif(segments_type == "EVENT" and is_valid_event_segments(day_segments, day_segments_file)):
-        day_segments = parse_event_segments(day_segments, device_id)
+        day_segments = parse_event_segments(day_segments, device_ids)
    else:
        raise ValueError("{} does not have a format compatible with frequency, periodic or event day segments. Please refer to [LINK]".format(day_segments_file))
    return day_segments
 participant_file = yaml.load(open(snakemake.input[1], 'r'), Loader=yaml.FullLoader)
-device_id = participant_file["PHONE"]["DEVICE_IDS"]
+device_ids = []
-device_id = device_id[len(device_id) -1 ]
+for key in participant_file.keys():
-final_day_segments = parse_day_segments(snakemake.input[0], snakemake.params["day_segments_type"], device_id)
+    if "DEVICE_IDS" in participant_file[key]:
        device_ids = device_ids + participant_file[key]["DEVICE_IDS"]
 final_day_segments = parse_day_segments(snakemake.input[0], snakemake.params["day_segments_type"], device_ids)
 if snakemake.params["day_segments_type"] == "EVENT" and final_day_segments.shape[0] == 0:
    warnings.warn("There are no event day segments for {}. Check your day segment file {}".format(snakemake.params["pid"], snakemake.input[0]))