From a665aedbe88c94d0da30eca4714c42d88ebc99c5 Mon Sep 17 00:00:00 2001 From: JulioV Date: Mon, 14 Sep 2020 16:19:42 -0400 Subject: [PATCH] Optimise assign day segment --- src/data/assign_to_day_segment.R | 96 +++++++++++++------------------ src/data/process_location_types.R | 2 +- src/data/readable_datetime.R | 2 +- 3 files changed, 43 insertions(+), 57 deletions(-) diff --git a/src/data/assign_to_day_segment.R b/src/data/assign_to_day_segment.R index 5e4fe97a..b12bd58a 100644 --- a/src/data/assign_to_day_segment.R +++ b/src/data/assign_to_day_segment.R @@ -1,30 +1,43 @@ library("tidyverse") library("lubridate") +find_segments_frequency <- function(local_date, local_time_obj, segments){ + return(paste(segments %>% + mutate(in_segment = local_time_obj >= segment_start & local_time_obj <= segment_end) %>% + filter(in_segment == TRUE) %>% + mutate(segment_id = paste0("[", + label, "#", + local_date, "#", + paste(str_pad(hour(segment_start),2, pad="0"), str_pad(minute(segment_start),2, pad="0"), str_pad(second(segment_start),2, pad="0"),sep =":"), "#", + local_date, "#", + paste(str_pad(hour(segment_end),2, pad="0"), str_pad(minute(segment_end),2, pad="0"), str_pad(second(segment_end),2, pad="0"),sep =":"), + "]")) %>% + pull(segment_id), collapse = "|")) +} + +find_segments_periodic <- function(date_time, segments){ + return(paste(segments[[1]] %>% + select(segment_interval, segment_id) %>% + mutate(in_segment = date_time %within% segment_interval) %>% + filter(in_segment == TRUE) %>% + pull(segment_id), collapse = "|")) +} + +find_segments_event <- function(timestamp, segments){ + return(paste(segments %>% + mutate(in_segment = timestamp >= segment_start & timestamp <= segment_end) %>% + filter(in_segment == TRUE) %>% + pull(segment_id), collapse = "|")) +} + assign_to_day_segment <- function(sensor_data, day_segments, day_segments_type){ if(day_segments_type == "FREQUENCY"){ #FREQUENCY - sensor_data <- sensor_data %>% mutate(local_date_time_obj = lubridate::parse_date_time(local_time, orders = c("HMS", "HM"))) - day_segments <- day_segments %>% mutate(start_time = lubridate::parse_date_time(start_time, orders = c("HMS", "HM")), - end_time = start_time + minutes(length)) - # Create a new column for each day_segment - for(row_id in 1:nrow(day_segments)){ - row = day_segments[row_id,] - sensor_data <- sensor_data %>% mutate(!!paste("local_day_segment", row_id, sep = "_") := ifelse(local_date_time_obj >= row$start_time & local_date_time_obj < row$end_time, - paste0("[", - row$label, "#", - local_date, "#", - paste(str_pad(hour(row$start_time),2, pad="0"), str_pad(minute(row$start_time),2, pad="0"), str_pad(second(row$start_time),2, pad="0"),sep =":"), "#", - local_date, "#", - paste(str_pad(hour(row$end_time),2, pad="0"), str_pad(minute(row$end_time),2, pad="0"), str_pad(second(row$end_time),2, pad="0"),sep =":"), - "]"), NA)) - } - # Join all day_segments in a single column - sensor_data <- sensor_data %>% - unite("assigned_segments", starts_with("local_day_segment"), sep = "|", na.rm = TRUE) %>% - select(-local_date_time_obj) - + day_segments <- day_segments %>% mutate(segment_start = lubridate::parse_date_time(start_time, orders = c("HMS", "HM")), + segment_end = segment_start + minutes(length)) + sensor_data <- sensor_data %>% mutate(local_time_obj = lubridate::parse_date_time(local_time, orders = c("HMS", "HM")), + assigned_segments = map2_chr(local_date, local_time_obj, ~find_segments_frequency(.x, .y, day_segments))) %>% select(-local_time_obj) } else if (day_segments_type == "PERIODIC"){ #PERIODIC @@ -65,22 +78,14 @@ assign_to_day_segment <- function(sensor_data, day_segments, day_segments_type){ "]")) %>% select(segment_interval, label, segment_id)), # loop thorugh every day segment and assigned it to the rows that fall within its start and end - data = map2(data, inferred_day_segments, function(nested_data, segments){ - nested_data <- nested_data %>% mutate(assigned_segments = NA_character_, row_date_time = lubridate::ymd_hms(local_date_time, tz = local_timezone)) - for(row_id in 1:nrow(segments)){ - row = segments[row_id,] - nested_data <- nested_data %>% - mutate(assigned_segments_temp = if_else(row_date_time %within% row$segment_interval, row$segment_id, NA_character_)) %>% - unite(col = "assigned_segments", c(assigned_segments, assigned_segments_temp), na.rm = TRUE, sep = "") %>% - mutate(assigned_segments = str_replace(assigned_segments, pattern = "\\]\\[", replacement = "\\]\\|\\[")) # this replaces ][ with ]|[ - } - - return(nested_data %>% select(-row_date_time)) - }) + data = map2(data, inferred_day_segments, ~ .x %>% mutate(row_date_time = lubridate::ymd_hms(local_date_time, tz = local_timezone), + assigned_segments = map_chr(row_date_time, ~find_segments_periodic(.x, inferred_day_segments))) %>% + select(-row_date_time)) ) %>% + select(-existent_dates, -inferred_day_segments) %>% unnest(cols = data) %>% arrange(row_n) %>% - select(-row_n, -existent_dates, -inferred_day_segments) + select(-row_n) } else if ( day_segments_type == "EVENT"){ @@ -106,27 +111,8 @@ assign_to_day_segment <- function(sensor_data, day_segments, day_segments_type){ "]")) %>% select(-segment_start_datetime, -segment_end_datetime) - - sensor_data <- sensor_data %>% - mutate(row_n = row_number()) %>% - group_by(local_timezone) %>% - nest() %>% - mutate(data = map(data, function(nested_data){ - nested_data <- nested_data %>% mutate(assigned_segments = NA_character_) - for(row_id in 1:nrow(day_segments)){ - row = day_segments[row_id,] - nested_data <- nested_data %>% - mutate(assigned_segments_temp = if_else(timestamp >= row$segment_start & timestamp <= row$segment_end, row$segment_id, NA_character_)) %>% - unite(col = "assigned_segments", c(assigned_segments, assigned_segments_temp), na.rm = TRUE, sep = "") %>% - mutate(assigned_segments = str_replace(assigned_segments, pattern = "\\]\\[", replacement = "\\]\\|\\[")) #replace ][ with ]|[ - } - - return(nested_data) - })) %>% - unnest(cols = data) %>% - arrange(row_n) %>% - select(-row_n) - - + sensor_data <- sensor_data %>% mutate(assigned_segments = map_chr(timestamp, ~find_segments_event(.x, day_segments))) } + + return(sensor_data) } \ No newline at end of file diff --git a/src/data/process_location_types.R b/src/data/process_location_types.R index 2165be51..8f425f70 100644 --- a/src/data/process_location_types.R +++ b/src/data/process_location_types.R @@ -68,7 +68,7 @@ if(locations_to_use == "ALL"){ filter(n == 1 | (n > 1 & provider == "fused")) %>% select(-n) %>% ungroup() - processed_locations <- assign_to_day_segment(resampled_locations, day_segments, day_segments_type, timezone) + processed_locations <- assign_to_day_segment(resampled_locations, day_segments, day_segments_type) } else { processed_locations <- locations } diff --git a/src/data/readable_datetime.R b/src/data/readable_datetime.R index c6a8d6cb..490a6927 100644 --- a/src/data/readable_datetime.R +++ b/src/data/readable_datetime.R @@ -43,6 +43,6 @@ if(!is.null(timezone_periods)){ local_timezone = fixed_timezone, local_date_time = format(utc_date_time, tz = fixed_timezone, "%Y-%m-%d %H:%M:%S")) output <- split_local_date_time(output, day_segments) - output <- assign_to_day_segment(output, day_segments, day_segments_type, fixed_timezone) + output <- assign_to_day_segment(output, day_segments, day_segments_type) write_csv(output, sensor_output) }