2020-08-28 19:53:00 +02:00
library ( " stringr" )
2020-08-28 23:40:23 +02:00
rapids_log_tag <- " RAPIDS:"
2020-08-28 19:53:00 +02:00
filter_data_by_segment <- function ( data , day_segment ) {
# Filter the rows that belong to day_segment, and put the segment full name in a new column for grouping
date_regex = " [0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2}"
hour_regex = " [0-9]{2}:[0-9]{2}:[0-9]{2}"
data <- data %>%
filter ( grepl ( paste0 ( " \\[" , day_segment , " #" ) , assigned_segments ) ) %>%
mutate ( local_segment = str_extract ( assigned_segments , paste0 ( " \\[" , day_segment , " #" , date_regex , " #" , hour_regex , " #" , date_regex , " #" , hour_regex , " \\]" ) ) ,
local_segment = str_sub ( local_segment , 2 , -2 ) ) # get rid of first and last character([])
return ( data )
}
2020-08-28 23:40:23 +02:00
fetch_provider_features <- function ( provider , provider_key , config_key , sensor_data_file , day_segments_file ) {
sensor_features <- data.frame ( local_segment = character ( ) , stringsAsFactors = FALSE )
sensor_data <- read.csv ( sensor_data_file , stringsAsFactors = FALSE )
day_segments_labels <- read.csv ( day_segments_file , stringsAsFactors = FALSE )
if ( ! " FEATURES" %in% names ( provider ) )
2020-09-01 00:51:06 +02:00
stop ( paste0 ( " Provider config[" , config_key , " ][PROVIDERS][" , provider_key , " ] is missing a FEATURES attribute in config.yaml" ) )
2020-08-28 23:40:23 +02:00
if ( provider [ [ " COMPUTE" ] ] == TRUE ) {
code_path <- paste0 ( " src/features/" , config_key , " /" , provider [ [ " SRC_FOLDER" ] ] , " /main.R" )
source ( code_path )
features_function <- match.fun ( paste0 ( provider [ [ " SRC_FOLDER" ] ] , " _features" ) )
day_segments <- day_segments_labels %>% pull ( label )
for ( day_segment in day_segments ) {
print ( paste ( rapids_log_tag , " Processing" , config_key , provider_key , day_segment ) )
features <- features_function ( sensor_data , day_segment , provider )
# Check all features names contain the provider key so they are unique
features_names <- colnames ( features %>% select ( - local_segment ) )
if ( any ( ! grepl ( paste0 ( " .*(" , str_to_lower ( provider_key ) , " ).*" ) , features_names ) ) )
stop ( paste ( " The name of all calls features of" , provider_key , " must contain its name in lower case but the following don't [" , paste ( features_names [ ! grepl ( paste0 ( " .*(" , str_to_lower ( provider_key ) , " ).*" ) , features_names ) ] , collapse = " , " ) , " ]" ) )
sensor_features <- merge ( sensor_features , features , all = TRUE )
}
} else {
for ( feature in provider [ [ " FEATURES" ] ] )
sensor_features [ , feature ] <- NA
}
sensor_features <- sensor_features %>% separate ( col = local_segment ,
into = c ( " local_segment_label" , " local_start_date" , " local_start_time" , " local_end_date" , " local_end_time" ) ,
sep = " #" ,
remove = FALSE )
return ( sensor_features )
}