Add check for non-overlapping event day segments
parent
86509207ac
commit
1d04aa6807
|
@ -1,9 +1,9 @@
|
||||||
label,event_timestamp,length,shift,shift_direction,pid
|
label,event_timestamp,length,shift,shift_direction,device_id
|
||||||
stress,1587661220000,1hours,0minutes,1,test01
|
stress,1587661220000,1hours,0minutes,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||||
stress,1587747620000,4hours,4hours,-1,test01
|
stress,1587747620000,4hours,4hours,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||||
stress,1587906020000,3hours,0minutes,1,test01
|
stress,1587906020000,3hours,0minutes,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||||
stress,1588003220000,7hours,4hours,-1,test01
|
stress,1588003220000,7hours,4hours,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||||
stress,1588172420000,9hours,0,-1,test01
|
stress,1588172420000,9hours,0,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||||
mood,1587661220000,7days,0,0,p02
|
mood,1587661220000,1hour,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||||
mood,1587747620000,7days,0,0,p02
|
mood,1587747620000,1days,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||||
mood,1587906020000,7days,0,0,p02
|
mood,1587906020000,7days,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
|
||||||
|
|
|
|
@ -53,7 +53,8 @@ rule download_fitbit_data:
|
||||||
|
|
||||||
rule compute_day_segments:
|
rule compute_day_segments:
|
||||||
input:
|
input:
|
||||||
config["DAY_SEGMENTS"]["FILE"]
|
config["DAY_SEGMENTS"]["FILE"],
|
||||||
|
"data/external/participant_files/{pid}.yaml"
|
||||||
params:
|
params:
|
||||||
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
||||||
pid = "{pid}"
|
pid = "{pid}"
|
||||||
|
|
|
@ -138,13 +138,14 @@ assign_to_day_segment <- function(sensor_data, day_segments, day_segments_type,
|
||||||
sensor_data <- sensor_data %>%
|
sensor_data <- sensor_data %>%
|
||||||
group_by(local_timezone) %>%
|
group_by(local_timezone) %>%
|
||||||
nest() %>%
|
nest() %>%
|
||||||
mutate(inferred_day_segments = map(local_timezone, ~ day_segments %>%
|
mutate(inferred_day_segments = map(local_timezone, function(tz){
|
||||||
|
inferred <- day_segments %>%
|
||||||
mutate(shift = ifelse(shift == "0", "0seconds", shift),
|
mutate(shift = ifelse(shift == "0", "0seconds", shift),
|
||||||
segment_start_ts = event_timestamp + (as.integer(seconds(lubridate::duration(shift))) * ifelse(shift_direction >= 0, 1, -1) * 1000),
|
segment_start_ts = event_timestamp + (as.integer(seconds(lubridate::duration(shift))) * ifelse(shift_direction >= 0, 1, -1) * 1000),
|
||||||
segment_end_ts = segment_start_ts + (as.integer(seconds(lubridate::duration(length))) * 1000),
|
segment_end_ts = segment_start_ts + (as.integer(seconds(lubridate::duration(length))) * 1000),
|
||||||
# these start and end datetime objects are for labeling only
|
# these start and end datetime objects are for labeling only
|
||||||
segment_id_start = lubridate::as_datetime(segment_start_ts/1000, tz = .x),
|
segment_id_start = lubridate::as_datetime(segment_start_ts/1000, tz = tz),
|
||||||
segment_id_end = lubridate::as_datetime(segment_end_ts/1000, tz = .x),
|
segment_id_end = lubridate::as_datetime(segment_end_ts/1000, tz = tz),
|
||||||
segment_end_ts = segment_end_ts + 999,
|
segment_end_ts = segment_end_ts + 999,
|
||||||
segment_id = paste0("[",
|
segment_id = paste0("[",
|
||||||
paste0(label,"#",
|
paste0(label,"#",
|
||||||
|
@ -153,7 +154,17 @@ assign_to_day_segment <- function(sensor_data, day_segments, day_segments_type,
|
||||||
lubridate::date(segment_id_end), " ",
|
lubridate::date(segment_id_end), " ",
|
||||||
paste(str_pad(hour(segment_id_end),2, pad="0"), str_pad(minute(segment_id_end),2, pad="0"), str_pad(second(segment_id_end),2, pad="0"),sep =":")),";",
|
paste(str_pad(hour(segment_id_end),2, pad="0"), str_pad(minute(segment_id_end),2, pad="0"), str_pad(second(segment_id_end),2, pad="0"),sep =":")),";",
|
||||||
paste0(segment_start_ts, ",", segment_end_ts)),
|
paste0(segment_start_ts, ",", segment_end_ts)),
|
||||||
"]"))),
|
"]"))
|
||||||
|
# Check that for overlapping segments (not allowed because our resampling episode algorithm would have to have a second instead of minute granularity that increases storage and computation time)
|
||||||
|
overlapping <- inferred %>% group_by(label) %>% arrange(segment_start_ts) %>%
|
||||||
|
mutate(overlaps = if_else(segment_start_ts <= lag(segment_end_ts), TRUE, FALSE),
|
||||||
|
overlapping_segments = paste(paste(lag(label), lag(event_timestamp), lag(length), lag(shift), lag(shift_direction), lag(device_id), sep = ","),"and",
|
||||||
|
paste(label, event_timestamp, length, shift, shift_direction, device_id, sep = ",")))
|
||||||
|
if(any(overlapping$overlaps, na.rm = TRUE)){
|
||||||
|
stop(paste0("\n\nOne or more event day segments overlap for ",overlapping$device_id[[1]],", modify their lengths so they don't:\n", paste0(overlapping %>% filter(overlaps == TRUE) %>% pull(overlapping_segments), collapse = "\n"), "\n\n"))
|
||||||
|
} else{
|
||||||
|
return(inferred)
|
||||||
|
}}),
|
||||||
data = map2(data, inferred_day_segments, assign_rows_to_segments)) %>%
|
data = map2(data, inferred_day_segments, assign_rows_to_segments)) %>%
|
||||||
select(-inferred_day_segments) %>%
|
select(-inferred_day_segments) %>%
|
||||||
unnest(data) %>%
|
unnest(data) %>%
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import warnings
|
import warnings
|
||||||
|
import yaml
|
||||||
|
|
||||||
def is_valid_frequency_segments(day_segments, day_segments_file):
|
def is_valid_frequency_segments(day_segments, day_segments_file):
|
||||||
"""
|
"""
|
||||||
|
@ -107,9 +108,9 @@ def is_valid_periodic_segments(day_segments, day_segments_file):
|
||||||
def is_valid_event_segments(day_segments, day_segments_file):
|
def is_valid_event_segments(day_segments, day_segments_file):
|
||||||
day_segments = day_segments.copy(deep=True)
|
day_segments = day_segments.copy(deep=True)
|
||||||
|
|
||||||
valid_columns = ["label", "event_timestamp", "length", "shift", "shift_direction", "pid"]
|
valid_columns = ["label", "event_timestamp", "length", "shift", "shift_direction", "device_id"]
|
||||||
if len(list(set(day_segments.columns) - set(valid_columns))) > 0:
|
if len(list(set(day_segments.columns) - set(valid_columns))) > 0:
|
||||||
error_message = 'The EVENT day segments file in [DAY_SEGMENTS][FILE] must have six columns: label, event_timestamp, length, shift, shift_direction and pid ' \
|
error_message = 'The EVENT day segments file in [DAY_SEGMENTS][FILE] must have six columns: label, event_timestamp, length, shift, shift_direction and device_id ' \
|
||||||
'but instead we found {}. Modify {}'.format(list(day_segments.columns), day_segments_file)
|
'but instead we found {}. Modify {}'.format(list(day_segments.columns), day_segments_file)
|
||||||
raise ValueError(error_message)
|
raise ValueError(error_message)
|
||||||
|
|
||||||
|
@ -167,10 +168,10 @@ def parse_periodic_segments(day_segments):
|
||||||
day_segments.loc[day_segments["repeats_on"] == "every_day", "repeats_value"] = 0
|
day_segments.loc[day_segments["repeats_on"] == "every_day", "repeats_value"] = 0
|
||||||
return day_segments
|
return day_segments
|
||||||
|
|
||||||
def parse_event_segments(day_segments, pid):
|
def parse_event_segments(day_segments, device_id):
|
||||||
return day_segments.query("pid == @pid")
|
return day_segments.query("device_id == @device_id")
|
||||||
|
|
||||||
def parse_day_segments(day_segments_file, segments_type, pid):
|
def parse_day_segments(day_segments_file, segments_type, device_id):
|
||||||
# Add code to validate and parse frequencies, intervals, and events
|
# Add code to validate and parse frequencies, intervals, and events
|
||||||
# Expected formats:
|
# Expected formats:
|
||||||
# Frequency: label, length columns (e.g. my_prefix, 5) length has to be in minutes (int)
|
# Frequency: label, length columns (e.g. my_prefix, 5) length has to be in minutes (int)
|
||||||
|
@ -195,12 +196,15 @@ def parse_day_segments(day_segments_file, segments_type, pid):
|
||||||
elif(segments_type == "PERIODIC" and is_valid_periodic_segments(day_segments, day_segments_file)):
|
elif(segments_type == "PERIODIC" and is_valid_periodic_segments(day_segments, day_segments_file)):
|
||||||
day_segments = parse_periodic_segments(day_segments)
|
day_segments = parse_periodic_segments(day_segments)
|
||||||
elif(segments_type == "EVENT" and is_valid_event_segments(day_segments, day_segments_file)):
|
elif(segments_type == "EVENT" and is_valid_event_segments(day_segments, day_segments_file)):
|
||||||
day_segments = parse_event_segments(day_segments, pid)
|
day_segments = parse_event_segments(day_segments, device_id)
|
||||||
else:
|
else:
|
||||||
raise ValueError("{} does not have a format compatible with frequency, periodic or event day segments. Please refer to [LINK]".format(day_segments_file))
|
raise ValueError("{} does not have a format compatible with frequency, periodic or event day segments. Please refer to [LINK]".format(day_segments_file))
|
||||||
return day_segments
|
return day_segments
|
||||||
|
|
||||||
final_day_segments = parse_day_segments(snakemake.input[0], snakemake.params["day_segments_type"], snakemake.params["pid"])
|
participant_file = yaml.load(open(snakemake.input[1], 'r'), Loader=yaml.FullLoader)
|
||||||
|
device_id = participant_file["PHONE"]["DEVICE_IDS"]
|
||||||
|
device_id = device_id[len(device_id) -1 ]
|
||||||
|
final_day_segments = parse_day_segments(snakemake.input[0], snakemake.params["day_segments_type"], device_id)
|
||||||
|
|
||||||
if snakemake.params["day_segments_type"] == "EVENT" and final_day_segments.shape[0] == 0:
|
if snakemake.params["day_segments_type"] == "EVENT" and final_day_segments.shape[0] == 0:
|
||||||
warnings.warn("There are no event day segments for {}. Check your day segment file {}".format(snakemake.params["pid"], snakemake.input[0]))
|
warnings.warn("There are no event day segments for {}. Check your day segment file {}".format(snakemake.params["pid"], snakemake.input[0]))
|
||||||
|
|
Loading…
Reference in New Issue