2020-08-28 19:53:00 +02:00
source ( " renv/activate.R" )
2020-10-23 16:41:00 +02:00
library ( " dplyr" , warn.conflicts = F )
2020-08-28 19:53:00 +02:00
library ( " stringr" )
2021-03-23 19:04:01 +01:00
library ( " lubridate" )
library ( " purrr" )
2020-08-28 19:53:00 +02:00
create_empty_file <- function ( requested_features ) {
return ( data.frame ( local_segment = character ( ) ,
2021-03-23 19:04:01 +01:00
hometime = numeric ( ) ,
disttravelled = numeric ( ) ,
rog = numeric ( ) ,
maxdiam = numeric ( ) ,
maxhomedist = numeric ( ) ,
siglocsvisited = numeric ( ) ,
avgflightlen = numeric ( ) ,
stdflightlen = numeric ( ) ,
avgflightdur = numeric ( ) ,
stdflightdur = numeric ( ) ,
probpause = numeric ( ) ,
siglocentropy = numeric ( ) ,
minsmissing = numeric ( ) ,
circdnrtn = numeric ( ) ,
wkenddayrtn = numeric ( ) ,
minutes_data_used = numeric ( )
) %>% select ( all_of ( requested_features ) ) )
}
summarise_multiday_segments <- function ( segments , features ) {
features <- features %>% mutate ( local_date = ymd ( local_date ) )
segments <- segments %>% extract ( col = local_segment ,
into = c ( " local_segment_start_datetime" , " local_segment_end_datetime" ) ,
" .*#(.*) .*,(.*) .*" ,
remove = FALSE ) %>%
mutate ( local_segment_start_datetime = ymd ( local_segment_start_datetime ) ,
local_segment_end_datetime = ymd ( local_segment_end_datetime ) ) %>%
group_by ( local_segment ) %>%
nest ( ) %>%
mutate ( data = map ( data , function ( nested_data , nested_features ) {
summary <- nested_features %>% filter ( local_date >= nested_data $ local_segment_start_datetime &
local_date <= nested_data $ local_segment_end_datetime )
if ( nrow ( summary ) > 0 )
summary <- summary %>%
summarise ( across ( c ( hometime , disttravelled , siglocsvisited , minutes_data_used ) , sum ) ,
across ( c ( maxdiam , maxhomedist ) , max ) ,
across ( c ( rog , avgflightlen , stdflightlen , avgflightdur , stdflightdur , probpause , siglocentropy , circdnrtn , wkenddayrtn , minsmissing ) , mean ) )
return ( summary )
} , features ) ) %>%
unnest ( cols = everything ( ) ) %>%
ungroup ( )
return ( segments )
2020-08-28 19:53:00 +02:00
}
2020-12-03 00:41:03 +01:00
barnett_features <- function ( sensor_data_files , time_segment , params ) {
2020-08-28 19:53:00 +02:00
location_features <- NULL
2021-03-23 19:04:01 +01:00
daily_features <- read.csv ( sensor_data_files [ [ " barnett_daily" ] ] , stringsAsFactors = FALSE )
location <- read.csv ( sensor_data_files [ [ " sensor_data" ] ] , stringsAsFactors = FALSE )
2020-08-28 19:53:00 +02:00
minutes_data_used <- params [ [ " MINUTES_DATA_USED" ] ]
available_features <- c ( " hometime" , " disttravelled" , " rog" , " maxdiam" , " maxhomedist" , " siglocsvisited" , " avgflightlen" , " stdflightlen" ,
2021-03-23 19:04:01 +01:00
" avgflightdur" , " stdflightdur" , " probpause" , " siglocentropy" , " circdnrtn" , " wkenddayrtn" )
2020-08-28 19:53:00 +02:00
requested_features <- intersect ( unlist ( params [ " FEATURES" ] , use.names = F ) , available_features )
2020-11-25 20:49:42 +01:00
requested_features <- c ( " local_segment" , requested_features )
2020-08-28 19:53:00 +02:00
if ( minutes_data_used )
2020-11-25 20:49:42 +01:00
requested_features <- c ( requested_features , " minutes_data_used" )
2021-03-23 19:04:01 +01:00
if ( nrow ( location ) > 0 & nrow ( daily_features ) > 0 ) {
2020-12-03 00:41:03 +01:00
location <- location %>% filter_data_by_segment ( time_segment )
2020-10-19 21:07:12 +02:00
datetime_start_regex = " [0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
datetime_end_regex = " [0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
2020-12-03 00:41:03 +01:00
location <- location %>% mutate ( is_daily = str_detect ( local_segment , paste0 ( time_segment , " #" , datetime_start_regex , " ," , datetime_end_regex ) ) )
2021-03-23 19:04:01 +01:00
if ( nrow ( location ) == 0 || ! all ( location $ is_daily ) ) {
message ( paste ( " Barnett's location features cannot be computed for data or time segmentes that do not span entire days (00:00:00 to 23:59:59). Skipping " , time_segment ) )
2020-08-28 19:53:00 +02:00
location_features <- create_empty_file ( requested_features )
} else {
2021-03-23 19:04:01 +01:00
location_dates_segments <- location %>% select ( local_segment ) %>% distinct ( local_segment , .keep_all = TRUE )
features <- summarise_multiday_segments ( location_dates_segments , daily_features )
location_features <- features %>% select ( all_of ( requested_features ) )
2020-08-28 19:53:00 +02:00
}
2021-03-23 19:04:01 +01:00
} else
location_features <- create_empty_file ( requested_features )
2020-08-28 19:53:00 +02:00
return ( location_features )
}