Change how we compute AR episodes

pull/103/head
JulioV 2020-09-22 17:32:51 -04:00
parent a851a460e4
commit 303d1472ed
4 changed files with 28 additions and 39 deletions

View File

@ -39,10 +39,10 @@ def optional_ar_input(wildcards):
if platform == "android":
return ["data/raw/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_with_datetime_unified.csv",
"data/processed/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_deltas.csv"]
"data/interim/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_episodes.csv"]
elif platform == "ios":
return ["data/raw/{pid}/"+config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]+"_with_datetime_unified.csv",
"data/processed/{pid}/"+config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]+"_deltas.csv"]
"data/interim/{pid}/"+config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]+"_episodes.csv"]
def optional_conversation_input(wildcards):
platform = infer_participant_platform("data/external/"+wildcards.pid)

View File

@ -99,17 +99,17 @@ rule google_activity_recognition_deltas:
input:
expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
output:
expand("data/processed/{{pid}}/{sensor}_deltas.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
expand("data/interim/{{pid}}/{sensor}_episodes.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
script:
"../src/features/activity_recognition_deltas.R"
"../src/features/ar/episodes/activity_recognition_episodes.R"
rule ios_activity_recognition_deltas:
input:
expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
output:
expand("data/processed/{{pid}}/{sensor}_deltas.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
expand("data/interim/{{pid}}/{sensor}_episodes.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
script:
"../src/features/activity_recognition_deltas.R"
"../src/features/ar/episodes/activity_recognition_episodes.R"
rule locations_python_features:
input:

View File

@ -1,33 +0,0 @@
source("renv/activate.R")
library("tidyverse")
gar <- read.csv(snakemake@input[[1]])
if(nrow(gar) > 0){
activity_episodes <-
gar %>%
mutate(activity_episode = cumsum(c(1, head(activity_type, -1) != tail(activity_type, -1)))) %>%
group_by(activity_episode) %>%
filter(n() > 1) %>%
summarize(activity = first(activity_name),
time_diff = (last(timestamp) - first(timestamp)) / (1000 * 60),
local_start_date_time = first(local_date_time),
local_end_date_time = last(local_date_time),
local_start_date = first(local_date),
local_end_date = last(local_date),
local_start_day_segment = first(local_day_segment),
local_end_day_segment = last(local_day_segment)) %>%
select(-activity_episode)
} else {
activity_episodes <- data.frame(activity = character(),
time_diff = numeric(),
local_start_date_time = character(),
local_end_date_time = character(),
local_start_date = character(),
local_end_date = character(),
local_start_day_segment = character(),
local_end_day_segment = character())
}
write.csv(activity_episodes, snakemake@output[[1]], row.names = FALSE)

View File

@ -0,0 +1,22 @@
source("renv/activate.R")
library("dplyr")
activity_recognition <- read.csv(snakemake@input[[1]])
if(nrow(activity_recognition) > 0){
threshold_between_rows = 5
ar_episodes <- activity_recognition %>%
mutate(start_timestamp = timestamp,
end_timestamp = lead(start_timestamp) - 1,
time_diff = (end_timestamp - start_timestamp) / 1000 / 60,
time_diff = if_else(time_diff > threshold_between_rows, threshold_between_rows, time_diff),
episode_id = 1:n()) %>%
select(episode_id, start_timestamp, end_timestamp, activity_type)
} else {
ar_episodes <- data.frame(start_timestamp = numeric(),
end_timestamp = numeric(),
episode_id = numeric())
}
write.csv(ar_episodes, snakemake@output[[1]], row.names = FALSE)