Add unify ios and android rule and script

replace/48fcc661ec076567ad67981a6b9b40af18fc5061
JulioV 2019-11-12 14:57:27 -05:00
parent 384f401062
commit bea791a106
2 changed files with 82 additions and 1 deletions

View File

@ -30,4 +30,15 @@ rule phone_valid_sensed_days:
output:
"data/interim/{pid}/phone_valid_sensed_days.csv"
script:
"../src/data/phone_valid_sensed_days.R"
"../src/data/phone_valid_sensed_days.R"
rule unify_ios_android:
input:
sensor_data = "data/raw/{pid}/{sensor}_with_datetime.csv",
participant_info = "data/external/{pid}"
params:
sensor = "{sensor}"
output:
"data/raw/{pid}/{sensor}_with_datetime_unified.csv"
script:
"../src/data/unify_ios_android.R"

View File

@ -0,0 +1,70 @@
source("packrat/init.R")
library(dplyr)
unify_ios_calls <- function(ios_calls){
# Androids call types 1=incoming, 2=outgoing, 3=missed
# iOS' call status 1=incoming, 2=connected, 3=dialing, 4=disconnected
# iOS' call types based on call status: (1,2,4)=incoming=1, (3,2,4)=outgoing=2, (1,4) or (3,4)=missed=3
# Sometimes (due to a possible bug in Aware) sequences get logged on the exact same timestamp, thus 3-item sequences can be 2,3,4 or 3,2,4
# Even tho iOS stores the duration of ringing/dialing for missed calls, we set it to 0 to match Android
ios_calls <- ios_calls %>%
arrange(trace, timestamp, call_type) %>%
group_by(trace) %>%
# search for the disconnect event, as it is common to outgoing, received and missed calls
mutate(completed_call = ifelse(call_type == 4, 2, 0),
# assign the same ID to all events before a 4
completed_call = cumsum(c(1, head(completed_call, -1) != tail(completed_call, -1))),
# hack to match ID of last event (4) to that of the previous rows
completed_call = ifelse(call_type == 4, completed_call - 1, completed_call)) %>%
summarise(call_type_sequence = paste(call_type, collapse = ","), # collapse all events before a 4
# use this if Android measures calls' duration from pick up to hang up
# duration = last(call_duration),
# sanity check, timestamp_diff should be equal or close to duration sum
# timestamp_diff = trunc((last(timestamp) - first(timestamp)) / 1000)
# use this if Android measures calls' duration from dialing/ringing to hang up
call_duration = sum(call_duration),
timestamp = first(timestamp),
utc_date_time = first(utc_date_time),
local_date_time = first(local_date_time),
local_date = first(local_date),
local_time = first(local_time),
local_hour = first(local_hour),
local_minute = first(local_minute),
local_day_segment = first(local_day_segment)
) %>%
mutate(call_type = case_when(
call_type_sequence == "1,2,4" | call_type_sequence == "2,1,4" ~ 1, # incoming
call_type_sequence == "1,4" ~ 3, # missed
call_type_sequence == "3,2,4" | call_type_sequence == "2,3,4" ~ 2, # outgoing
call_type_sequence == "3,4" ~ 4, # outgoing missed, we create this temp missed state to assign a duration of 0 below
TRUE ~ -1), # other, call sequences without a disconnect (4) event are discarded
# assign a duration of 0 to incoming and outgoing missed calls
call_duration = ifelse(call_type == 3 | call_type == 4, 0, call_duration),
# get rid of the temp missed call type, set to 3 to match Android
call_type = ifelse(call_type == 4, 3, call_type)
) %>%
# discard sequences without an event 4 (disconnect)
filter(call_type > 0) %>%
ungroup() %>%
arrange(timestamp)
return(ios_calls)
}
sensor_data <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE)
participant_info <- snakemake@input[["participant_info"]]
sensor <- snakemake@params[["sensor"]]
platform <- readLines(participant_info, n=2)[[2]]
if(sensor == "calls"){
if(platform == "android"){
write.csv(sensor_data, snakemake@output[[1]], row.names = FALSE)
} else if(platform == "ios"){
sensor_data = unify_ios_calls(sensor_data)
write.csv(sensor_data, snakemake@output[[1]], row.names = FALSE)
}
}