Expose episode threshold between rows parameter

pull/103/head
JulioV 2020-10-26 15:28:19 -04:00
parent 2fe5a0d822
commit 234588c94d
3 changed files with 14 additions and 10 deletions

View File

@ -22,6 +22,8 @@ rule phone_accelerometer_python_features:
rule activity_recognition_episodes:
input:
sensor_data = "data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv"
params:
episode_threshold_between_rows = config["PHONE_BATTERY"]["EPISODE_THRESHOLD_BETWEEN_ROWS"]
output:
"data/interim/{pid}/phone_activity_recognition_episodes.csv"
script:
@ -56,6 +58,8 @@ rule phone_applications_foreground_python_features:
rule battery_episodes:
input:
"data/raw/{pid}/phone_battery_raw.csv"
params:
episode_threshold_between_rows = config["PHONE_BATTERY"]["EPISODE_THRESHOLD_BETWEEN_ROWS"]
output:
"data/interim/{pid}/phone_battery_episodes.csv"
script:

View File

@ -2,19 +2,19 @@ source("renv/activate.R")
library("dplyr", warn.conflicts = F)
activity_recognition <- read.csv(snakemake@input[[1]])
episode_threshold_between_rows <- snakemake@params[["episode_threshold_between_rows"]]
if(nrow(activity_recognition) > 0){
# TODO expose this in the config file
threshold_between_rows = 5 * 60000
episode_threshold_between_rows = episode_threshold_between_rows * 60000
ar_episodes <- activity_recognition %>%
mutate(start_timestamp = timestamp, # a battery level starts as soon as is logged
time_diff = (lead(timestamp) - start_timestamp), # lead diff
# we assume the current activity existed until the next row only if that row is logged within [threshold_between_rows] minutes
end_timestamp = if_else(is.na(time_diff) | time_diff > (threshold_between_rows), start_timestamp + (threshold_between_rows), lead(timestamp) - 1),
# we assume the current activity existed until the next row only if that row is logged within [episode_threshold_between_rows] minutes
end_timestamp = if_else(is.na(time_diff) | time_diff > (episode_threshold_between_rows), start_timestamp + (episode_threshold_between_rows), lead(timestamp) - 1),
time_diff = c(1, diff(start_timestamp)), # lag diff
type_diff = c(1, diff(activity_type)),
episode_id = cumsum(type_diff != 0 | time_diff > (threshold_between_rows))) %>%
episode_id = cumsum(type_diff != 0 | time_diff > (episode_threshold_between_rows))) %>%
group_by(episode_id) %>%
summarise(activity_name = first(activity_name), activity_type = first(activity_type), start_timestamp=first(start_timestamp), end_timestamp = last(end_timestamp))

View File

@ -2,22 +2,22 @@ source("renv/activate.R")
library("dplyr", warn.conflicts = F)
battery <- read.csv(snakemake@input[[1]])
episode_threshold_between_rows <- snakemake@params[["episode_threshold_between_rows"]]
if(nrow(battery) > 0){
# TODO expose this in the config file
threshold_between_rows = 30 * 60000
episode_threshold_between_rows = episode_threshold_between_rows * 60000
battery_episodes <- battery %>%
filter(battery_status >= 2 ) %>% # discard unknown states
mutate(start_timestamp = timestamp, # a battery level starts as soon as is logged
end_timestamp = lead(timestamp) - 1, # a battery level ends as soon as a new one is logged
time_diff = (end_timestamp - start_timestamp),
# we assume the current level existed until the next row only if that row is logged within [threshold_between_rows] minutes
end_timestamp = if_else(is.na(time_diff) | time_diff > (threshold_between_rows), start_timestamp + (threshold_between_rows), end_timestamp)) %>%
# we assume the current level existed until the next row only if that row is logged within [episode_threshold_between_rows] minutes
end_timestamp = if_else(is.na(time_diff) | time_diff > (episode_threshold_between_rows), start_timestamp + (episode_threshold_between_rows), end_timestamp)) %>%
mutate(time_diff = c(1, diff(start_timestamp)),
level_diff = c(1, diff(battery_level)),
status_diff = c(1, diff(battery_status)),
episode_id = cumsum(level_diff != 0 | status_diff != 0 | time_diff > (threshold_between_rows))) %>%
episode_id = cumsum(level_diff != 0 | status_diff != 0 | time_diff > (episode_threshold_between_rows))) %>%
group_by(episode_id) %>%
summarise(battery_level = first(battery_level), battery_status = first(battery_status), start_timestamp=first(start_timestamp), end_timestamp = last(end_timestamp))
} else {