Update AR episode computation

2020-09-29 17:26:43 -04:00 · 2020-09-29 17:26:43 -04:00 · 39f6e3841c
parent 10b0386c8f
commit 39f6e3841c
1 changed files with 15 additions and 8 deletions
--- a/src/features/ar/episodes/activity_recognition_episodes.R
+++ b/src/features/ar/episodes/activity_recognition_episodes.R
@ -4,19 +4,26 @@ library("dplyr")
 activity_recognition <- read.csv(snakemake@input[[1]])
 if(nrow(activity_recognition) > 0){
-  threshold_between_rows = 5
+  # TODO expose this in the config file
  threshold_between_rows = 5 * 60000
  ar_episodes <- activity_recognition %>% 
-    mutate(start_timestamp = timestamp,
+  mutate(start_timestamp = timestamp, # a battery level starts as soon as is logged
-          end_timestamp = lead(start_timestamp) - 1,
+         time_diff = (lead(timestamp) - start_timestamp), # lead diff
-          time_diff = (end_timestamp - start_timestamp) / 1000 / 60,
+         # we assume the current activity existed until the next row only if that row is logged within [threshold_between_rows] minutes
-          time_diff = if_else(time_diff > threshold_between_rows, threshold_between_rows, time_diff),
+         end_timestamp = if_else(is.na(time_diff) | time_diff > (threshold_between_rows), start_timestamp + (threshold_between_rows), lead(timestamp) - 1), 
-          episode_id = 1:n()) %>%
+         time_diff = c(1, diff(start_timestamp)), # lag diff
-    select(episode_id, start_timestamp, end_timestamp, activity_type)
+         type_diff = c(1, diff(activity_type)),
         episode_id = cumsum(type_diff != 0 | time_diff > (threshold_between_rows))) %>% 
  group_by(episode_id) %>%
  summarise(activity_name = first(activity_name), activity_type = first(activity_type), start_timestamp=first(start_timestamp), end_timestamp = last(end_timestamp))
 } else {
  ar_episodes <- data.frame(start_timestamp = numeric(), 
                            end_timestamp = numeric(),
-                            episode_id = numeric())
+                            episode_id = numeric(),
                            activity_type = numeric(),
                            activity_name = character())
 }
 write.csv(ar_episodes, snakemake@output[[1]], row.names = FALSE)