diff --git a/src/data/phone_sensed_bins.R b/src/data/phone_sensed_bins.R index 2d5664fa..de87a5f7 100644 --- a/src/data/phone_sensed_bins.R +++ b/src/data/phone_sensed_bins.R @@ -2,6 +2,7 @@ source("renv/activate.R") library(dplyr) library(tidyr) +library(lubridate) all_sensors <- snakemake@input[["all_sensors"]] bin_size <- snakemake@params[["bin_size"]] @@ -21,6 +22,9 @@ phone_sensed_bins <- all_sensor_data %>% group_by(local_date, local_hour, bin) %>% summarise(sensor_count = n_distinct(sensor)) %>% ungroup() %>% + mutate(local_date = lubridate::ymd(local_date)) %>% + complete(local_date = seq.Date(min(local_date), max(local_date), by="day"), + fill = list(local_hour = 0, bin = 0, sensor_count = 0)) %>% complete(nesting(local_date), local_hour = seq(0, 23, 1), bin = seq(0, (59 %/% bin_size) * bin_size, bin_size), diff --git a/src/data/phone_valid_sensed_days.R b/src/data/phone_valid_sensed_days.R index 649402dd..450baf70 100644 --- a/src/data/phone_valid_sensed_days.R +++ b/src/data/phone_valid_sensed_days.R @@ -9,9 +9,8 @@ output_file <- snakemake@output[[1]] phone_valid_sensed_days <- phone_sensed_bins %>% pivot_longer(cols = -local_date, names_to = c("hour", "bin"), names_sep = "_") %>% - filter(value > 0) %>% group_by(local_date, hour) %>% - summarise(valid_bins = n()) %>% + summarise(valid_bins = sum(value > 0)) %>% group_by(local_date) %>% summarise(valid_sensed_hours = sum(valid_bins >= min_valid_bins_per_hour)) %>% mutate(is_valid_sensed_day = ifelse(valid_sensed_hours >= min_valid_hours_per_day, TRUE, FALSE))