From 234588c94dbe757fc370dec4a14dac7990183223 Mon Sep 17 00:00:00 2001 From: JulioV Date: Mon, 26 Oct 2020 15:28:19 -0400 Subject: [PATCH] Expose episode threshold between rows parameter --- rules/features.smk | 4 ++++ .../episodes/activity_recognition_episodes.R | 10 +++++----- src/features/phone_battery/episodes/battery_episodes.R | 10 +++++----- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/rules/features.smk b/rules/features.smk index 75219830..b3f965f3 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -22,6 +22,8 @@ rule phone_accelerometer_python_features: rule activity_recognition_episodes: input: sensor_data = "data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv" + params: + episode_threshold_between_rows = config["PHONE_BATTERY"]["EPISODE_THRESHOLD_BETWEEN_ROWS"] output: "data/interim/{pid}/phone_activity_recognition_episodes.csv" script: @@ -56,6 +58,8 @@ rule phone_applications_foreground_python_features: rule battery_episodes: input: "data/raw/{pid}/phone_battery_raw.csv" + params: + episode_threshold_between_rows = config["PHONE_BATTERY"]["EPISODE_THRESHOLD_BETWEEN_ROWS"] output: "data/interim/{pid}/phone_battery_episodes.csv" script: diff --git a/src/features/phone_activity_recognition/episodes/activity_recognition_episodes.R b/src/features/phone_activity_recognition/episodes/activity_recognition_episodes.R index 9a87b70f..3018264e 100644 --- a/src/features/phone_activity_recognition/episodes/activity_recognition_episodes.R +++ b/src/features/phone_activity_recognition/episodes/activity_recognition_episodes.R @@ -2,19 +2,19 @@ source("renv/activate.R") library("dplyr", warn.conflicts = F) activity_recognition <- read.csv(snakemake@input[[1]]) +episode_threshold_between_rows <- snakemake@params[["episode_threshold_between_rows"]] if(nrow(activity_recognition) > 0){ - # TODO expose this in the config file - threshold_between_rows = 5 * 60000 + episode_threshold_between_rows = episode_threshold_between_rows * 60000 ar_episodes <- activity_recognition %>% mutate(start_timestamp = timestamp, # a battery level starts as soon as is logged time_diff = (lead(timestamp) - start_timestamp), # lead diff - # we assume the current activity existed until the next row only if that row is logged within [threshold_between_rows] minutes - end_timestamp = if_else(is.na(time_diff) | time_diff > (threshold_between_rows), start_timestamp + (threshold_between_rows), lead(timestamp) - 1), + # we assume the current activity existed until the next row only if that row is logged within [episode_threshold_between_rows] minutes + end_timestamp = if_else(is.na(time_diff) | time_diff > (episode_threshold_between_rows), start_timestamp + (episode_threshold_between_rows), lead(timestamp) - 1), time_diff = c(1, diff(start_timestamp)), # lag diff type_diff = c(1, diff(activity_type)), - episode_id = cumsum(type_diff != 0 | time_diff > (threshold_between_rows))) %>% + episode_id = cumsum(type_diff != 0 | time_diff > (episode_threshold_between_rows))) %>% group_by(episode_id) %>% summarise(activity_name = first(activity_name), activity_type = first(activity_type), start_timestamp=first(start_timestamp), end_timestamp = last(end_timestamp)) diff --git a/src/features/phone_battery/episodes/battery_episodes.R b/src/features/phone_battery/episodes/battery_episodes.R index 1b919b54..6e0d9db4 100644 --- a/src/features/phone_battery/episodes/battery_episodes.R +++ b/src/features/phone_battery/episodes/battery_episodes.R @@ -2,22 +2,22 @@ source("renv/activate.R") library("dplyr", warn.conflicts = F) battery <- read.csv(snakemake@input[[1]]) +episode_threshold_between_rows <- snakemake@params[["episode_threshold_between_rows"]] if(nrow(battery) > 0){ - # TODO expose this in the config file - threshold_between_rows = 30 * 60000 + episode_threshold_between_rows = episode_threshold_between_rows * 60000 battery_episodes <- battery %>% filter(battery_status >= 2 ) %>% # discard unknown states mutate(start_timestamp = timestamp, # a battery level starts as soon as is logged end_timestamp = lead(timestamp) - 1, # a battery level ends as soon as a new one is logged time_diff = (end_timestamp - start_timestamp), - # we assume the current level existed until the next row only if that row is logged within [threshold_between_rows] minutes - end_timestamp = if_else(is.na(time_diff) | time_diff > (threshold_between_rows), start_timestamp + (threshold_between_rows), end_timestamp)) %>% + # we assume the current level existed until the next row only if that row is logged within [episode_threshold_between_rows] minutes + end_timestamp = if_else(is.na(time_diff) | time_diff > (episode_threshold_between_rows), start_timestamp + (episode_threshold_between_rows), end_timestamp)) %>% mutate(time_diff = c(1, diff(start_timestamp)), level_diff = c(1, diff(battery_level)), status_diff = c(1, diff(battery_status)), - episode_id = cumsum(level_diff != 0 | status_diff != 0 | time_diff > (threshold_between_rows))) %>% + episode_id = cumsum(level_diff != 0 | status_diff != 0 | time_diff > (episode_threshold_between_rows))) %>% group_by(episode_id) %>% summarise(battery_level = first(battery_level), battery_status = first(battery_status), start_timestamp=first(start_timestamp), end_timestamp = last(end_timestamp)) } else {