2020-11-25 01:12:16 +01:00
|
|
|
library("dplyr", warn.conflicts = F)
|
|
|
|
library(tidyr)
|
|
|
|
library(readr)
|
|
|
|
|
2020-12-03 00:41:03 +01:00
|
|
|
compute_data_yield_features <- function(data, feature_name, time_segment, provider){
|
|
|
|
data <- data %>% filter_data_by_segment(time_segment)
|
2021-04-05 22:57:05 +02:00
|
|
|
if(nrow(data) == 0)
|
2021-04-05 23:13:36 +02:00
|
|
|
return(tibble(local_segment = character(), ratiovalidyieldedminutes = numeric(), ratiovalidyieldedhours = numeric()))
|
2020-11-25 01:12:16 +01:00
|
|
|
features <- data %>%
|
|
|
|
separate(timestamps_segment, into = c("start_timestamp", "end_timestamp"), convert = T, sep = ",") %>%
|
|
|
|
mutate(duration_minutes = (end_timestamp - start_timestamp) / 60000,
|
|
|
|
timestamp_since_segment_start = timestamp - start_timestamp,
|
|
|
|
minute_bin = timestamp_since_segment_start %/% 60000, # 60 * 1000
|
|
|
|
hour_bin = timestamp_since_segment_start %/% 3600000) %>% # (60 * 60 * 1000)
|
|
|
|
group_by(local_segment, hour_bin) %>%
|
|
|
|
summarise(minute_count = n_distinct(minute_bin),
|
|
|
|
duration_minutes = first(duration_minutes),
|
|
|
|
valid_hour = (minute_count/min(duration_minutes, 60)) > provider$MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS) %>%
|
|
|
|
group_by(local_segment) %>%
|
|
|
|
summarise(valid_yielded_minutes = sum(minute_count),
|
|
|
|
valid_yielded_hours = sum(valid_hour == TRUE) / 1.0,
|
|
|
|
duration_minutes = first(duration_minutes),
|
|
|
|
duration_hours = duration_minutes / 60.0,
|
2021-08-06 18:32:16 +02:00
|
|
|
ratiovalidyieldedminutes = min( valid_yielded_minutes / duration_minutes, 1),
|
|
|
|
ratiovalidyieldedhours = if_else(duration_hours > 1, min( valid_yielded_hours / duration_hours, 1), valid_yielded_hours))
|
2020-11-25 01:12:16 +01:00
|
|
|
return(features)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2020-12-03 00:41:03 +01:00
|
|
|
rapids_features <- function(sensor_data_files, time_segment, provider){
|
2020-11-25 01:12:16 +01:00
|
|
|
|
|
|
|
yield_data <- read_csv(sensor_data_files[["sensor_data"]], col_types = cols_only(timestamp ="d", assigned_segments = "c"))
|
|
|
|
requested_features <- provider[["FEATURES"]]
|
|
|
|
|
|
|
|
# Output dataframe
|
|
|
|
features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
|
|
|
|
|
|
|
|
# The name of the features this function can compute
|
|
|
|
base_features_names <- c("ratiovalidyieldedminutes", "ratiovalidyieldedhours")
|
|
|
|
|
|
|
|
# The subset of requested features this function can compute
|
|
|
|
features_to_compute <- intersect(base_features_names, requested_features)
|
|
|
|
|
2020-12-03 00:41:03 +01:00
|
|
|
features <- compute_data_yield_features(yield_data, feature_name, time_segment, provider) %>%
|
2020-11-25 20:49:42 +01:00
|
|
|
select(c("local_segment", features_to_compute))
|
2020-11-25 01:12:16 +01:00
|
|
|
|
|
|
|
return(features)
|
|
|
|
}
|