Fix datetime labels of event segments across multiple tzs
parent
8909876cff
commit
1025e6d9d8
|
@ -271,6 +271,19 @@ Time segments (or epochs) are the time windows on which you want to extract beha
|
|||
|
||||
The three `mood` segments are 1 hour, 1 day and 7 days long and have no shift. In addition, these `mood` segments are grouped together, meaning that although RAPIDS will compute features on each one of them, some necessary information to compute a few of such features will be extracted from all three segments, for example the phone contact that called a participant the most or the location clusters visited by a participant.
|
||||
|
||||
??? info "Date time labels of event segments"
|
||||
In the final feature file, you will find a row per event segment. The `local_segment` column of each row has a `label`, a start date-time string, and an end date-time string.
|
||||
|
||||
```bash
|
||||
weeklysurvey2060#2020-09-12 01:00:00,2020-09-18 23:59:59
|
||||
```
|
||||
|
||||
All sensor data is always segmented based on timestamps, and the date-time strings are attached for informative purposes. For example, you can plot your features based on these strings.
|
||||
|
||||
When you configure RAPIDS to work with a single time zone, such tz code will be used to convert start/end timestamps (the ones you typed in the event segments file) into start/end date-time strings. However, when you configure RAPIDS to work with multiple time zones, RAPIDS will use the most common time zone across all devices of every participant to do the conversion. The most common time zone is the one in which a participant spent the most time.
|
||||
|
||||
In practical terms, this means that the date-time strings of event segments that happened in uncommon time zones will have shifted start/end date-time labels. However, the data within each segment was correctly filtered based on timestamps.
|
||||
|
||||
### Segment Examples
|
||||
|
||||
=== "5-minutes"
|
||||
|
|
|
@ -29,14 +29,9 @@ infer_event_segments <- function(tz, segments){
|
|||
return(inferred)
|
||||
}
|
||||
|
||||
assign_to_event_segments <- function(sensor_data, time_segments){
|
||||
assign_to_event_segments <- function(sensor_data, time_segments, most_common_tz){
|
||||
inferred_time_segments <- infer_event_segments(most_common_tz, time_segments)
|
||||
sensor_data <- sensor_data %>%
|
||||
group_by(local_timezone) %>%
|
||||
nest() %>%
|
||||
mutate(inferred_time_segments = map(local_timezone, infer_event_segments, time_segments),
|
||||
data = map2(data, inferred_time_segments, assign_rows_to_segments)) %>%
|
||||
select(-inferred_time_segments) %>%
|
||||
unnest(data) %>%
|
||||
arrange(timestamp) %>%
|
||||
ungroup()
|
||||
assign_rows_to_segments(inferred_time_segments) %>%
|
||||
arrange(timestamp)
|
||||
}
|
|
@ -79,6 +79,31 @@ infer_tz_codes_from_phones <- function(data_device_ids, tz_codes, participant_fi
|
|||
data_tz_codes
|
||||
}
|
||||
|
||||
get_devices_ids <- function(participant_data){
|
||||
devices_ids = c()
|
||||
for(device in participant_data)
|
||||
for(attribute in names(device))
|
||||
if(attribute == "DEVICE_IDS")
|
||||
devices_ids <- c(devices_ids, device[[attribute]])
|
||||
return(devices_ids)
|
||||
}
|
||||
|
||||
get_participant_most_common_tz <- function(tz_codes_file, participant_file){
|
||||
tz_codes <- read.csv(tz_codes_file)
|
||||
participant_device_ids <- get_devices_ids(read_yaml(participant_file))
|
||||
|
||||
participant_tz_codes <- tz_codes %>% filter(device_id %in% participant_device_ids)
|
||||
most_common_tz <- buils_tz_intervals(participant_tz_codes, "all") %>%
|
||||
mutate(duration = end_timestamp - timestamp) %>%
|
||||
filter(duration == max(duration)) %>%
|
||||
head(1) %>%
|
||||
pull(tzcode)
|
||||
|
||||
if(length(most_common_tz)==0)
|
||||
most_common_tz <- "UTC"
|
||||
return(most_common_tz)
|
||||
}
|
||||
|
||||
# TODO include CSV timezone file in rule
|
||||
multiple_time_zone_assignment <- function(sensor_data, timezone_parameters, device_type, pid, participant_file){
|
||||
if(nrow(sensor_data) == 0)
|
||||
|
|
|
@ -16,7 +16,7 @@ assign_rows_to_segments <- function(data, segments){
|
|||
data
|
||||
}
|
||||
|
||||
assign_to_time_segment <- function(sensor_data, time_segments, time_segments_type, include_past_periodic_segments){
|
||||
assign_to_time_segment <- function(sensor_data, time_segments, time_segments_type, include_past_periodic_segments, most_common_tz){
|
||||
|
||||
if(nrow(sensor_data) == 0 || nrow(time_segments) == 0)
|
||||
return(sensor_data %>% mutate(assigned_segments = NA))
|
||||
|
@ -28,7 +28,7 @@ assign_to_time_segment <- function(sensor_data, time_segments, time_segments_typ
|
|||
|
||||
} else if ( time_segments_type == "EVENT"){
|
||||
source("src/data/datetime/assign_to_event_segments.R")
|
||||
sensor_data <- assign_to_event_segments(sensor_data, time_segments)
|
||||
sensor_data <- assign_to_event_segments(sensor_data, time_segments, most_common_tz)
|
||||
return(sensor_data)
|
||||
}
|
||||
}
|
|
@ -111,14 +111,18 @@ readable_datetime <- function(){
|
|||
|
||||
validate_user_timezones(timezone_parameters)
|
||||
|
||||
if(timezone_parameters$TYPE == "SINGLE")
|
||||
if(timezone_parameters$TYPE == "SINGLE"){
|
||||
output <- input %>% mutate(local_timezone = timezone_parameters$SINGLE$TZCODE)
|
||||
else if(timezone_parameters$TYPE == "MULTIPLE")
|
||||
most_common_tz <- timezone_parameters$SINGLE$TZCODE
|
||||
}
|
||||
else if(timezone_parameters$TYPE == "MULTIPLE"){
|
||||
output <- multiple_time_zone_assignment(input, timezone_parameters, device_type, pid, participant_file)
|
||||
most_common_tz <- get_participant_most_common_tz(timezone_parameters$MULTIPLE$TZCODES_FILE, participant_file) # in assign_to_multiple_timezones.R
|
||||
}
|
||||
|
||||
output <- create_mising_temporal_column(output, device_type)
|
||||
output <- split_local_date_time(output)
|
||||
output <- assign_to_time_segment(output, time_segments, time_segments_type, include_past_periodic_segments)
|
||||
output <- assign_to_time_segment(output, time_segments, time_segments_type, include_past_periodic_segments, most_common_tz)
|
||||
output <- filter_wanted_dates(output, participant_file, device_type)
|
||||
output <- output %>% arrange(timestamp)
|
||||
|
||||
|
|
Loading…
Reference in New Issue