Add check for non-overlapping event day segments
parent
86509207ac
commit
1d04aa6807
|
@ -1,9 +1,9 @@
|
|||
label,event_timestamp,length,shift,shift_direction,pid
|
||||
stress,1587661220000,1hours,0minutes,1,test01
|
||||
stress,1587747620000,4hours,4hours,-1,test01
|
||||
stress,1587906020000,3hours,0minutes,1,test01
|
||||
stress,1588003220000,7hours,4hours,-1,test01
|
||||
stress,1588172420000,9hours,0,-1,test01
|
||||
mood,1587661220000,7days,0,0,p02
|
||||
mood,1587747620000,7days,0,0,p02
|
||||
mood,1587906020000,7days,0,0,p02
|
||||
label,event_timestamp,length,shift,shift_direction,device_id
|
||||
stress,1587661220000,1hours,0minutes,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||
stress,1587747620000,4hours,4hours,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||
stress,1587906020000,3hours,0minutes,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||
stress,1588003220000,7hours,4hours,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||
stress,1588172420000,9hours,0,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||
mood,1587661220000,1hour,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||
mood,1587747620000,1days,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||
mood,1587906020000,7days,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||
|
|
|
|
@ -53,7 +53,8 @@ rule download_fitbit_data:
|
|||
|
||||
rule compute_day_segments:
|
||||
input:
|
||||
config["DAY_SEGMENTS"]["FILE"]
|
||||
config["DAY_SEGMENTS"]["FILE"],
|
||||
"data/external/participant_files/{pid}.yaml"
|
||||
params:
|
||||
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
||||
pid = "{pid}"
|
||||
|
|
|
@ -138,13 +138,14 @@ assign_to_day_segment <- function(sensor_data, day_segments, day_segments_type,
|
|||
sensor_data <- sensor_data %>%
|
||||
group_by(local_timezone) %>%
|
||||
nest() %>%
|
||||
mutate(inferred_day_segments = map(local_timezone, ~ day_segments %>%
|
||||
mutate(inferred_day_segments = map(local_timezone, function(tz){
|
||||
inferred <- day_segments %>%
|
||||
mutate(shift = ifelse(shift == "0", "0seconds", shift),
|
||||
segment_start_ts = event_timestamp + (as.integer(seconds(lubridate::duration(shift))) * ifelse(shift_direction >= 0, 1, -1) * 1000),
|
||||
segment_end_ts = segment_start_ts + (as.integer(seconds(lubridate::duration(length))) * 1000),
|
||||
# these start and end datetime objects are for labeling only
|
||||
segment_id_start = lubridate::as_datetime(segment_start_ts/1000, tz = .x),
|
||||
segment_id_end = lubridate::as_datetime(segment_end_ts/1000, tz = .x),
|
||||
segment_id_start = lubridate::as_datetime(segment_start_ts/1000, tz = tz),
|
||||
segment_id_end = lubridate::as_datetime(segment_end_ts/1000, tz = tz),
|
||||
segment_end_ts = segment_end_ts + 999,
|
||||
segment_id = paste0("[",
|
||||
paste0(label,"#",
|
||||
|
@ -153,7 +154,17 @@ assign_to_day_segment <- function(sensor_data, day_segments, day_segments_type,
|
|||
lubridate::date(segment_id_end), " ",
|
||||
paste(str_pad(hour(segment_id_end),2, pad="0"), str_pad(minute(segment_id_end),2, pad="0"), str_pad(second(segment_id_end),2, pad="0"),sep =":")),";",
|
||||
paste0(segment_start_ts, ",", segment_end_ts)),
|
||||
"]"))),
|
||||
"]"))
|
||||
# Check that for overlapping segments (not allowed because our resampling episode algorithm would have to have a second instead of minute granularity that increases storage and computation time)
|
||||
overlapping <- inferred %>% group_by(label) %>% arrange(segment_start_ts) %>%
|
||||
mutate(overlaps = if_else(segment_start_ts <= lag(segment_end_ts), TRUE, FALSE),
|
||||
overlapping_segments = paste(paste(lag(label), lag(event_timestamp), lag(length), lag(shift), lag(shift_direction), lag(device_id), sep = ","),"and",
|
||||
paste(label, event_timestamp, length, shift, shift_direction, device_id, sep = ",")))
|
||||
if(any(overlapping$overlaps, na.rm = TRUE)){
|
||||
stop(paste0("\n\nOne or more event day segments overlap for ",overlapping$device_id[[1]],", modify their lengths so they don't:\n", paste0(overlapping %>% filter(overlaps == TRUE) %>% pull(overlapping_segments), collapse = "\n"), "\n\n"))
|
||||
} else{
|
||||
return(inferred)
|
||||
}}),
|
||||
data = map2(data, inferred_day_segments, assign_rows_to_segments)) %>%
|
||||
select(-inferred_day_segments) %>%
|
||||
unnest(data) %>%
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import pandas as pd
|
||||
import warnings
|
||||
import yaml
|
||||
|
||||
def is_valid_frequency_segments(day_segments, day_segments_file):
|
||||
"""
|
||||
|
@ -107,9 +108,9 @@ def is_valid_periodic_segments(day_segments, day_segments_file):
|
|||
def is_valid_event_segments(day_segments, day_segments_file):
|
||||
day_segments = day_segments.copy(deep=True)
|
||||
|
||||
valid_columns = ["label", "event_timestamp", "length", "shift", "shift_direction", "pid"]
|
||||
valid_columns = ["label", "event_timestamp", "length", "shift", "shift_direction", "device_id"]
|
||||
if len(list(set(day_segments.columns) - set(valid_columns))) > 0:
|
||||
error_message = 'The EVENT day segments file in [DAY_SEGMENTS][FILE] must have six columns: label, event_timestamp, length, shift, shift_direction and pid ' \
|
||||
error_message = 'The EVENT day segments file in [DAY_SEGMENTS][FILE] must have six columns: label, event_timestamp, length, shift, shift_direction and device_id ' \
|
||||
'but instead we found {}. Modify {}'.format(list(day_segments.columns), day_segments_file)
|
||||
raise ValueError(error_message)
|
||||
|
||||
|
@ -167,10 +168,10 @@ def parse_periodic_segments(day_segments):
|
|||
day_segments.loc[day_segments["repeats_on"] == "every_day", "repeats_value"] = 0
|
||||
return day_segments
|
||||
|
||||
def parse_event_segments(day_segments, pid):
|
||||
return day_segments.query("pid == @pid")
|
||||
def parse_event_segments(day_segments, device_id):
|
||||
return day_segments.query("device_id == @device_id")
|
||||
|
||||
def parse_day_segments(day_segments_file, segments_type, pid):
|
||||
def parse_day_segments(day_segments_file, segments_type, device_id):
|
||||
# Add code to validate and parse frequencies, intervals, and events
|
||||
# Expected formats:
|
||||
# Frequency: label, length columns (e.g. my_prefix, 5) length has to be in minutes (int)
|
||||
|
@ -195,12 +196,15 @@ def parse_day_segments(day_segments_file, segments_type, pid):
|
|||
elif(segments_type == "PERIODIC" and is_valid_periodic_segments(day_segments, day_segments_file)):
|
||||
day_segments = parse_periodic_segments(day_segments)
|
||||
elif(segments_type == "EVENT" and is_valid_event_segments(day_segments, day_segments_file)):
|
||||
day_segments = parse_event_segments(day_segments, pid)
|
||||
day_segments = parse_event_segments(day_segments, device_id)
|
||||
else:
|
||||
raise ValueError("{} does not have a format compatible with frequency, periodic or event day segments. Please refer to [LINK]".format(day_segments_file))
|
||||
return day_segments
|
||||
|
||||
final_day_segments = parse_day_segments(snakemake.input[0], snakemake.params["day_segments_type"], snakemake.params["pid"])
|
||||
participant_file = yaml.load(open(snakemake.input[1], 'r'), Loader=yaml.FullLoader)
|
||||
device_id = participant_file["PHONE"]["DEVICE_IDS"]
|
||||
device_id = device_id[len(device_id) -1 ]
|
||||
final_day_segments = parse_day_segments(snakemake.input[0], snakemake.params["day_segments_type"], device_id)
|
||||
|
||||
if snakemake.params["day_segments_type"] == "EVENT" and final_day_segments.shape[0] == 0:
|
||||
warnings.warn("There are no event day segments for {}. Check your day segment file {}".format(snakemake.params["pid"], snakemake.input[0]))
|
||||
|
|
Loading…
Reference in New Issue